Port folder renaming changes from AOM

Manually cherry-picked commits:
ceef058 libvpx->libaom part2
3d26d91 libvpx -> libaom
cfea7dd vp10/ -> av1/
3a8eff7 Fix a build issue for a test
bf4202e Rename vpx to aom

Change-Id: I1b0eb5a40796e3aaf41c58984b4229a439a597dc
diff --git a/av1/common/alloccommon.c b/av1/common/alloccommon.c
new file mode 100644
index 0000000..b6ff12a
--- /dev/null
+++ b/av1/common/alloccommon.c
@@ -0,0 +1,190 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_config.h"
+#include "aom_mem/vpx_mem.h"
+
+#include "av1/common/alloccommon.h"
+#include "av1/common/blockd.h"
+#include "av1/common/entropymode.h"
+#include "av1/common/entropymv.h"
+#include "av1/common/onyxc_int.h"
+
+void vp10_set_mb_mi(VP10_COMMON *cm, int width, int height) {
+  const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2);
+  const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2);
+
+  cm->mi_cols = aligned_width >> MI_SIZE_LOG2;
+  cm->mi_rows = aligned_height >> MI_SIZE_LOG2;
+  cm->mi_stride = calc_mi_size(cm->mi_cols);
+
+  cm->mb_cols = (cm->mi_cols + 1) >> 1;
+  cm->mb_rows = (cm->mi_rows + 1) >> 1;
+  cm->MBs = cm->mb_rows * cm->mb_cols;
+}
+
+static int alloc_seg_map(VP10_COMMON *cm, int seg_map_size) {
+  int i;
+
+  for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) {
+    cm->seg_map_array[i] = (uint8_t *)vpx_calloc(seg_map_size, 1);
+    if (cm->seg_map_array[i] == NULL) return 1;
+  }
+  cm->seg_map_alloc_size = seg_map_size;
+
+  // Init the index.
+  cm->seg_map_idx = 0;
+  cm->prev_seg_map_idx = 1;
+
+  cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx];
+  if (!cm->frame_parallel_decode)
+    cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx];
+
+  return 0;
+}
+
+static void free_seg_map(VP10_COMMON *cm) {
+  int i;
+
+  for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) {
+    vpx_free(cm->seg_map_array[i]);
+    cm->seg_map_array[i] = NULL;
+  }
+
+  cm->current_frame_seg_map = NULL;
+
+  if (!cm->frame_parallel_decode) {
+    cm->last_frame_seg_map = NULL;
+  }
+}
+
+void vp10_free_ref_frame_buffers(BufferPool *pool) {
+  int i;
+
+  for (i = 0; i < FRAME_BUFFERS; ++i) {
+    if (pool->frame_bufs[i].ref_count > 0 &&
+        pool->frame_bufs[i].raw_frame_buffer.data != NULL) {
+      pool->release_fb_cb(pool->cb_priv, &pool->frame_bufs[i].raw_frame_buffer);
+      pool->frame_bufs[i].ref_count = 0;
+    }
+    vpx_free(pool->frame_bufs[i].mvs);
+    pool->frame_bufs[i].mvs = NULL;
+    vpx_free_frame_buffer(&pool->frame_bufs[i].buf);
+  }
+}
+
+#if CONFIG_LOOP_RESTORATION
+void vp10_free_restoration_buffers(VP10_COMMON *cm) {
+  vpx_free(cm->rst_info.bilateral_level);
+  cm->rst_info.bilateral_level = NULL;
+  vpx_free(cm->rst_info.vfilter);
+  cm->rst_info.vfilter = NULL;
+  vpx_free(cm->rst_info.hfilter);
+  cm->rst_info.hfilter = NULL;
+  vpx_free(cm->rst_info.wiener_level);
+  cm->rst_info.wiener_level = NULL;
+}
+#endif  // CONFIG_LOOP_RESTORATION
+
+void vp10_free_context_buffers(VP10_COMMON *cm) {
+  int i;
+  cm->free_mi(cm);
+  free_seg_map(cm);
+  for (i = 0; i < MAX_MB_PLANE; i++) {
+    vpx_free(cm->above_context[i]);
+    cm->above_context[i] = NULL;
+  }
+  vpx_free(cm->above_seg_context);
+  cm->above_seg_context = NULL;
+#if CONFIG_VAR_TX
+  vpx_free(cm->above_txfm_context);
+  cm->above_txfm_context = NULL;
+#endif
+}
+
+int vp10_alloc_context_buffers(VP10_COMMON *cm, int width, int height) {
+  int new_mi_size;
+
+  vp10_set_mb_mi(cm, width, height);
+  new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows);
+  if (cm->mi_alloc_size < new_mi_size) {
+    cm->free_mi(cm);
+    if (cm->alloc_mi(cm, new_mi_size)) goto fail;
+  }
+
+  if (cm->seg_map_alloc_size < cm->mi_rows * cm->mi_cols) {
+    // Create the segmentation map structure and set to 0.
+    free_seg_map(cm);
+    if (alloc_seg_map(cm, cm->mi_rows * cm->mi_cols)) goto fail;
+  }
+
+  if (cm->above_context_alloc_cols < cm->mi_cols) {
+    // TODO(geza.lore): These are bigger than they need to be.
+    // cm->tile_width would be enough but it complicates indexing a
+    // little elsewhere.
+    const int aligned_mi_cols =
+        ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
+    int i;
+
+    for (i = 0; i < MAX_MB_PLANE; i++) {
+      vpx_free(cm->above_context[i]);
+      cm->above_context[i] = (ENTROPY_CONTEXT *)vpx_calloc(
+          2 * aligned_mi_cols, sizeof(*cm->above_context[0]));
+      if (!cm->above_context[i]) goto fail;
+    }
+
+    vpx_free(cm->above_seg_context);
+    cm->above_seg_context = (PARTITION_CONTEXT *)vpx_calloc(
+        aligned_mi_cols, sizeof(*cm->above_seg_context));
+    if (!cm->above_seg_context) goto fail;
+
+#if CONFIG_VAR_TX
+    vpx_free(cm->above_txfm_context);
+    cm->above_txfm_context = (TXFM_CONTEXT *)vpx_calloc(
+        aligned_mi_cols, sizeof(*cm->above_txfm_context));
+    if (!cm->above_txfm_context) goto fail;
+#endif
+
+    cm->above_context_alloc_cols = aligned_mi_cols;
+  }
+
+  return 0;
+
+fail:
+  // clear the mi_* values to force a realloc on resync
+  vp10_set_mb_mi(cm, 0, 0);
+  vp10_free_context_buffers(cm);
+  return 1;
+}
+
+void vp10_remove_common(VP10_COMMON *cm) {
+  vp10_free_context_buffers(cm);
+
+  vpx_free(cm->fc);
+  cm->fc = NULL;
+  vpx_free(cm->frame_contexts);
+  cm->frame_contexts = NULL;
+}
+
+void vp10_init_context_buffers(VP10_COMMON *cm) {
+  cm->setup_mi(cm);
+  if (cm->last_frame_seg_map && !cm->frame_parallel_decode)
+    memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols);
+}
+
+void vp10_swap_current_and_last_seg_map(VP10_COMMON *cm) {
+  // Swap indices.
+  const int tmp = cm->seg_map_idx;
+  cm->seg_map_idx = cm->prev_seg_map_idx;
+  cm->prev_seg_map_idx = tmp;
+
+  cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx];
+  cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx];
+}
diff --git a/av1/common/alloccommon.h b/av1/common/alloccommon.h
new file mode 100644
index 0000000..d2d2643
--- /dev/null
+++ b/av1/common/alloccommon.h
@@ -0,0 +1,45 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_ALLOCCOMMON_H_
+#define VP10_COMMON_ALLOCCOMMON_H_
+
+#define INVALID_IDX -1  // Invalid buffer index.
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP10Common;
+struct BufferPool;
+
+void vp10_remove_common(struct VP10Common *cm);
+
+int vp10_alloc_context_buffers(struct VP10Common *cm, int width, int height);
+void vp10_init_context_buffers(struct VP10Common *cm);
+void vp10_free_context_buffers(struct VP10Common *cm);
+
+void vp10_free_ref_frame_buffers(struct BufferPool *pool);
+#if CONFIG_LOOP_RESTORATION
+void vp10_free_restoration_buffers(struct VP10Common *cm);
+#endif  // CONFIG_LOOP_RESTORATION
+
+int vp10_alloc_state_buffers(struct VP10Common *cm, int width, int height);
+void vp10_free_state_buffers(struct VP10Common *cm);
+
+void vp10_set_mb_mi(struct VP10Common *cm, int width, int height);
+
+void vp10_swap_current_and_last_seg_map(struct VP10Common *cm);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_ALLOCCOMMON_H_
diff --git a/av1/common/ans.h b/av1/common/ans.h
new file mode 100644
index 0000000..c974ada
--- /dev/null
+++ b/av1/common/ans.h
@@ -0,0 +1,414 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_ANS_H_
+#define VP10_COMMON_ANS_H_
+// An implementation of Asymmetric Numeral Systems
+// http://arxiv.org/abs/1311.2540v2
+
+#include <assert.h>
+#include "./vpx_config.h"
+#include "aom/vpx_integer.h"
+#include "aom_dsp/prob.h"
+#include "aom_ports/mem_ops.h"
+
+#define ANS_DIVIDE_BY_MULTIPLY 1
+#if ANS_DIVIDE_BY_MULTIPLY
+#include "av1/common/divide.h"
+#define ANS_DIVREM(quotient, remainder, dividend, divisor) \
+  do {                                                     \
+    quotient = fastdiv(dividend, divisor);                 \
+    remainder = dividend - quotient * divisor;             \
+  } while (0)
+#define ANS_DIV(dividend, divisor) fastdiv(dividend, divisor)
+#else
+#define ANS_DIVREM(quotient, remainder, dividend, divisor) \
+  do {                                                     \
+    quotient = dividend / divisor;                         \
+    remainder = dividend % divisor;                        \
+  } while (0)
+#define ANS_DIV(dividend, divisor) ((dividend) / (divisor))
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+struct AnsCoder {
+  uint8_t *buf;
+  int buf_offset;
+  uint32_t state;
+};
+
+struct AnsDecoder {
+  const uint8_t *buf;
+  int buf_offset;
+  uint32_t state;
+};
+
+typedef uint8_t AnsP8;
+#define ans_p8_precision 256u
+#define ans_p8_shift 8
+typedef uint16_t AnsP10;
+#define ans_p10_precision 1024u
+
+#define rans_precision ans_p10_precision
+
+#define l_base (ans_p10_precision * 4)  // l_base % precision must be 0
+#define io_base 256
+// Range I = { l_base, l_base + 1, ..., l_base * io_base - 1 }
+
+static INLINE void ans_write_init(struct AnsCoder *const ans,
+                                  uint8_t *const buf) {
+  ans->buf = buf;
+  ans->buf_offset = 0;
+  ans->state = l_base;
+}
+
+static INLINE int ans_write_end(struct AnsCoder *const ans) {
+  uint32_t state;
+  assert(ans->state >= l_base);
+  assert(ans->state < l_base * io_base);
+  state = ans->state - l_base;
+  if (state < (1 << 6)) {
+    ans->buf[ans->buf_offset] = (0x00 << 6) + state;
+    return ans->buf_offset + 1;
+  } else if (state < (1 << 14)) {
+    mem_put_le16(ans->buf + ans->buf_offset, (0x01 << 14) + state);
+    return ans->buf_offset + 2;
+  } else if (state < (1 << 22)) {
+    mem_put_le24(ans->buf + ans->buf_offset, (0x02 << 22) + state);
+    return ans->buf_offset + 3;
+  } else {
+    assert(0 && "State is too large to be serialized");
+    return ans->buf_offset;
+  }
+}
+
+// rABS with descending spread
+// p or p0 takes the place of l_s from the paper
+// ans_p8_precision is m
+static INLINE void rabs_desc_write(struct AnsCoder *ans, int val, AnsP8 p0) {
+  const AnsP8 p = ans_p8_precision - p0;
+  const unsigned l_s = val ? p : p0;
+  unsigned quot, rem;
+  if (ans->state >= l_base / ans_p8_precision * io_base * l_s) {
+    ans->buf[ans->buf_offset++] = ans->state % io_base;
+    ans->state /= io_base;
+  }
+  ANS_DIVREM(quot, rem, ans->state, l_s);
+  ans->state = quot * ans_p8_precision + rem + (val ? 0 : p);
+}
+
+#define ANS_IMPL1 0
+#define UNPREDICTABLE(x) x
+static INLINE int rabs_desc_read(struct AnsDecoder *ans, AnsP8 p0) {
+  int val;
+#if ANS_IMPL1
+  unsigned l_s;
+#else
+  unsigned quot, rem, x, xn;
+#endif
+  const AnsP8 p = ans_p8_precision - p0;
+  if (ans->state < l_base) {
+    ans->state = ans->state * io_base + ans->buf[--ans->buf_offset];
+  }
+#if ANS_IMPL1
+  val = ans->state % ans_p8_precision < p;
+  l_s = val ? p : p0;
+  ans->state = (ans->state / ans_p8_precision) * l_s +
+               ans->state % ans_p8_precision - (!val * p);
+#else
+  x = ans->state;
+  quot = x / ans_p8_precision;
+  rem = x % ans_p8_precision;
+  xn = quot * p;
+  val = rem < p;
+  if (UNPREDICTABLE(val)) {
+    ans->state = xn + rem;
+  } else {
+    // ans->state = quot * p0 + rem - p;
+    ans->state = x - xn - p;
+  }
+#endif
+  return val;
+}
+
+// rABS with ascending spread
+// p or p0 takes the place of l_s from the paper
+// ans_p8_precision is m
+static INLINE void rabs_asc_write(struct AnsCoder *ans, int val, AnsP8 p0) {
+  const AnsP8 p = ans_p8_precision - p0;
+  const unsigned l_s = val ? p : p0;
+  unsigned quot, rem;
+  if (ans->state >= l_base / ans_p8_precision * io_base * l_s) {
+    ans->buf[ans->buf_offset++] = ans->state % io_base;
+    ans->state /= io_base;
+  }
+  ANS_DIVREM(quot, rem, ans->state, l_s);
+  ans->state = quot * ans_p8_precision + rem + (val ? p0 : 0);
+}
+
+static INLINE int rabs_asc_read(struct AnsDecoder *ans, AnsP8 p0) {
+  int val;
+#if ANS_IMPL1
+  unsigned l_s;
+#else
+  unsigned quot, rem, x, xn;
+#endif
+  const AnsP8 p = ans_p8_precision - p0;
+  if (ans->state < l_base) {
+    ans->state = ans->state * io_base + ans->buf[--ans->buf_offset];
+  }
+#if ANS_IMPL1
+  val = ans->state % ans_p8_precision < p;
+  l_s = val ? p : p0;
+  ans->state = (ans->state / ans_p8_precision) * l_s +
+               ans->state % ans_p8_precision - (!val * p);
+#else
+  x = ans->state;
+  quot = x / ans_p8_precision;
+  rem = x % ans_p8_precision;
+  xn = quot * p;
+  val = rem >= p0;
+  if (UNPREDICTABLE(val)) {
+    ans->state = xn + rem - p0;
+  } else {
+    // ans->state = quot * p0 + rem - p0;
+    ans->state = x - xn;
+  }
+#endif
+  return val;
+}
+
+#define rabs_read rabs_desc_read
+#define rabs_write rabs_desc_write
+
+// uABS with normalization
+static INLINE void uabs_write(struct AnsCoder *ans, int val, AnsP8 p0) {
+  AnsP8 p = ans_p8_precision - p0;
+  const unsigned l_s = val ? p : p0;
+  while (ans->state >= l_base / ans_p8_precision * io_base * l_s) {
+    ans->buf[ans->buf_offset++] = ans->state % io_base;
+    ans->state /= io_base;
+  }
+  if (!val)
+    ans->state = ANS_DIV(ans->state * ans_p8_precision, p0);
+  else
+    ans->state = ANS_DIV((ans->state + 1) * ans_p8_precision + p - 1, p) - 1;
+}
+
+static INLINE int uabs_read(struct AnsDecoder *ans, AnsP8 p0) {
+  AnsP8 p = ans_p8_precision - p0;
+  int s;
+  // unsigned int xp1;
+  unsigned xp, sp;
+  unsigned state = ans->state;
+  while (state < l_base && ans->buf_offset > 0) {
+    state = state * io_base + ans->buf[--ans->buf_offset];
+  }
+  sp = state * p;
+  // xp1 = (sp + p) / ans_p8_precision;
+  xp = sp / ans_p8_precision;
+  // s = xp1 - xp;
+  s = (sp & 0xFF) >= p0;
+  if (UNPREDICTABLE(s))
+    ans->state = xp;
+  else
+    ans->state = state - xp;
+  return s;
+}
+
+static INLINE int uabs_read_bit(struct AnsDecoder *ans) {
+  int s;
+  unsigned state = ans->state;
+  while (state < l_base && ans->buf_offset > 0) {
+    state = state * io_base + ans->buf[--ans->buf_offset];
+  }
+  s = (int)(state & 1);
+  ans->state = state >> 1;
+  return s;
+}
+
+static INLINE int uabs_read_literal(struct AnsDecoder *ans, int bits) {
+  int literal = 0, bit;
+  assert(bits < 31);
+
+  // TODO(aconverse): Investigate ways to read/write literals faster,
+  // e.g. 8-bit chunks.
+  for (bit = bits - 1; bit >= 0; bit--) literal |= uabs_read_bit(ans) << bit;
+
+  return literal;
+}
+
+// TODO(aconverse): Replace trees with tokensets.
+static INLINE int uabs_read_tree(struct AnsDecoder *ans,
+                                 const vpx_tree_index *tree,
+                                 const AnsP8 *probs) {
+  vpx_tree_index i = 0;
+
+  while ((i = tree[i + uabs_read(ans, probs[i >> 1])]) > 0) continue;
+
+  return -i;
+}
+
+struct rans_sym {
+  AnsP10 prob;
+  AnsP10 cum_prob;  // not-inclusive
+};
+
+struct rans_dec_sym {
+  uint8_t val;
+  AnsP10 prob;
+  AnsP10 cum_prob;  // not-inclusive
+};
+
+// This is now just a boring cdf. It starts with an explicit zero.
+// TODO(aconverse): Remove starting zero.
+typedef uint16_t rans_dec_lut[16];
+
+static INLINE void rans_build_cdf_from_pdf(const AnsP10 token_probs[],
+                                           rans_dec_lut cdf_tab) {
+  int i;
+  cdf_tab[0] = 0;
+  for (i = 1; cdf_tab[i - 1] < rans_precision; ++i) {
+    cdf_tab[i] = cdf_tab[i - 1] + token_probs[i - 1];
+  }
+  assert(cdf_tab[i - 1] == rans_precision);
+}
+
+static INLINE int ans_find_largest(const AnsP10 *const pdf_tab, int num_syms) {
+  int largest_idx = -1;
+  int largest_p = -1;
+  int i;
+  for (i = 0; i < num_syms; ++i) {
+    int p = pdf_tab[i];
+    if (p > largest_p) {
+      largest_p = p;
+      largest_idx = i;
+    }
+  }
+  return largest_idx;
+}
+
+static INLINE void rans_merge_prob8_pdf(AnsP10 *const out_pdf,
+                                        const AnsP8 node_prob,
+                                        const AnsP10 *const src_pdf,
+                                        int in_syms) {
+  int i;
+  int adjustment = rans_precision;
+  const int round_fact = ans_p8_precision >> 1;
+  const AnsP8 p1 = ans_p8_precision - node_prob;
+  const int out_syms = in_syms + 1;
+  assert(src_pdf != out_pdf);
+
+  out_pdf[0] = node_prob << (10 - 8);
+  adjustment -= out_pdf[0];
+  for (i = 0; i < in_syms; ++i) {
+    int p = (p1 * src_pdf[i] + round_fact) >> ans_p8_shift;
+    p = VPXMIN(p, (int)rans_precision - in_syms);
+    p = VPXMAX(p, 1);
+    out_pdf[i + 1] = p;
+    adjustment -= p;
+  }
+
+  // Adjust probabilities so they sum to the total probability
+  if (adjustment > 0) {
+    i = ans_find_largest(out_pdf, out_syms);
+    out_pdf[i] += adjustment;
+  } else {
+    while (adjustment < 0) {
+      i = ans_find_largest(out_pdf, out_syms);
+      --out_pdf[i];
+      assert(out_pdf[i] > 0);
+      adjustment++;
+    }
+  }
+}
+
+// rANS with normalization
+// sym->prob takes the place of l_s from the paper
+// ans_p10_precision is m
+static INLINE void rans_write(struct AnsCoder *ans,
+                              const struct rans_sym *const sym) {
+  const AnsP10 p = sym->prob;
+  while (ans->state >= l_base / rans_precision * io_base * p) {
+    ans->buf[ans->buf_offset++] = ans->state % io_base;
+    ans->state /= io_base;
+  }
+  ans->state =
+      (ans->state / p) * rans_precision + ans->state % p + sym->cum_prob;
+}
+
+static INLINE void fetch_sym(struct rans_dec_sym *out, const rans_dec_lut cdf,
+                             AnsP10 rem) {
+  int i = 0;
+  // TODO(skal): if critical, could be a binary search.
+  // Or, better, an O(1) alias-table.
+  while (rem >= cdf[i]) {
+    ++i;
+  }
+  out->val = i - 1;
+  out->prob = (AnsP10)(cdf[i] - cdf[i - 1]);
+  out->cum_prob = (AnsP10)cdf[i - 1];
+}
+
+static INLINE int rans_read(struct AnsDecoder *ans, const rans_dec_lut tab) {
+  unsigned rem;
+  unsigned quo;
+  struct rans_dec_sym sym;
+  while (ans->state < l_base && ans->buf_offset > 0) {
+    ans->state = ans->state * io_base + ans->buf[--ans->buf_offset];
+  }
+  quo = ans->state / rans_precision;
+  rem = ans->state % rans_precision;
+  fetch_sym(&sym, tab, rem);
+  ans->state = quo * sym.prob + rem - sym.cum_prob;
+  return sym.val;
+}
+
+static INLINE int ans_read_init(struct AnsDecoder *const ans,
+                                const uint8_t *const buf, int offset) {
+  unsigned x;
+  if (offset < 1) return 1;
+  ans->buf = buf;
+  x = buf[offset - 1] >> 6;
+  if (x == 0) {
+    ans->buf_offset = offset - 1;
+    ans->state = buf[offset - 1] & 0x3F;
+  } else if (x == 1) {
+    if (offset < 2) return 1;
+    ans->buf_offset = offset - 2;
+    ans->state = mem_get_le16(buf + offset - 2) & 0x3FFF;
+  } else if (x == 2) {
+    if (offset < 3) return 1;
+    ans->buf_offset = offset - 3;
+    ans->state = mem_get_le24(buf + offset - 3) & 0x3FFFFF;
+  } else {
+    // x == 3 implies this byte is a superframe marker
+    return 1;
+  }
+  ans->state += l_base;
+  if (ans->state >= l_base * io_base) return 1;
+  return 0;
+}
+
+static INLINE int ans_read_end(struct AnsDecoder *const ans) {
+  return ans->state == l_base;
+}
+
+static INLINE int ans_reader_has_error(const struct AnsDecoder *const ans) {
+  return ans->state < l_base && ans->buf_offset == 0;
+}
+#undef ANS_DIVREM
+#ifdef __cplusplus
+}  // extern "C"
+#endif  // __cplusplus
+#endif  // VP10_COMMON_ANS_H_
diff --git a/av1/common/arm/neon/iht4x4_add_neon.c b/av1/common/arm/neon/iht4x4_add_neon.c
new file mode 100644
index 0000000..600e66b
--- /dev/null
+++ b/av1/common/arm/neon/iht4x4_add_neon.c
@@ -0,0 +1,233 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "./vp10_rtcd.h"
+#include "./vpx_config.h"
+#include "av1/common/common.h"
+
+static int16_t sinpi_1_9 = 0x14a3;
+static int16_t sinpi_2_9 = 0x26c9;
+static int16_t sinpi_3_9 = 0x3441;
+static int16_t sinpi_4_9 = 0x3b6c;
+static int16_t cospi_8_64 = 0x3b21;
+static int16_t cospi_16_64 = 0x2d41;
+static int16_t cospi_24_64 = 0x187e;
+
+static INLINE void TRANSPOSE4X4(int16x8_t *q8s16, int16x8_t *q9s16) {
+  int32x4_t q8s32, q9s32;
+  int16x4x2_t d0x2s16, d1x2s16;
+  int32x4x2_t q0x2s32;
+
+  d0x2s16 = vtrn_s16(vget_low_s16(*q8s16), vget_high_s16(*q8s16));
+  d1x2s16 = vtrn_s16(vget_low_s16(*q9s16), vget_high_s16(*q9s16));
+
+  q8s32 = vreinterpretq_s32_s16(vcombine_s16(d0x2s16.val[0], d0x2s16.val[1]));
+  q9s32 = vreinterpretq_s32_s16(vcombine_s16(d1x2s16.val[0], d1x2s16.val[1]));
+  q0x2s32 = vtrnq_s32(q8s32, q9s32);
+
+  *q8s16 = vreinterpretq_s16_s32(q0x2s32.val[0]);
+  *q9s16 = vreinterpretq_s16_s32(q0x2s32.val[1]);
+  return;
+}
+
+static INLINE void GENERATE_COSINE_CONSTANTS(int16x4_t *d0s16, int16x4_t *d1s16,
+                                             int16x4_t *d2s16) {
+  *d0s16 = vdup_n_s16(cospi_8_64);
+  *d1s16 = vdup_n_s16(cospi_16_64);
+  *d2s16 = vdup_n_s16(cospi_24_64);
+  return;
+}
+
+static INLINE void GENERATE_SINE_CONSTANTS(int16x4_t *d3s16, int16x4_t *d4s16,
+                                           int16x4_t *d5s16, int16x8_t *q3s16) {
+  *d3s16 = vdup_n_s16(sinpi_1_9);
+  *d4s16 = vdup_n_s16(sinpi_2_9);
+  *q3s16 = vdupq_n_s16(sinpi_3_9);
+  *d5s16 = vdup_n_s16(sinpi_4_9);
+  return;
+}
+
+static INLINE void IDCT4x4_1D(int16x4_t *d0s16, int16x4_t *d1s16,
+                              int16x4_t *d2s16, int16x8_t *q8s16,
+                              int16x8_t *q9s16) {
+  int16x4_t d16s16, d17s16, d18s16, d19s16, d23s16, d24s16;
+  int16x4_t d26s16, d27s16, d28s16, d29s16;
+  int32x4_t q10s32, q13s32, q14s32, q15s32;
+  int16x8_t q13s16, q14s16;
+
+  d16s16 = vget_low_s16(*q8s16);
+  d17s16 = vget_high_s16(*q8s16);
+  d18s16 = vget_low_s16(*q9s16);
+  d19s16 = vget_high_s16(*q9s16);
+
+  d23s16 = vadd_s16(d16s16, d18s16);
+  d24s16 = vsub_s16(d16s16, d18s16);
+
+  q15s32 = vmull_s16(d17s16, *d2s16);
+  q10s32 = vmull_s16(d17s16, *d0s16);
+  q13s32 = vmull_s16(d23s16, *d1s16);
+  q14s32 = vmull_s16(d24s16, *d1s16);
+  q15s32 = vmlsl_s16(q15s32, d19s16, *d0s16);
+  q10s32 = vmlal_s16(q10s32, d19s16, *d2s16);
+
+  d26s16 = vqrshrn_n_s32(q13s32, 14);
+  d27s16 = vqrshrn_n_s32(q14s32, 14);
+  d29s16 = vqrshrn_n_s32(q15s32, 14);
+  d28s16 = vqrshrn_n_s32(q10s32, 14);
+
+  q13s16 = vcombine_s16(d26s16, d27s16);
+  q14s16 = vcombine_s16(d28s16, d29s16);
+  *q8s16 = vaddq_s16(q13s16, q14s16);
+  *q9s16 = vsubq_s16(q13s16, q14s16);
+  *q9s16 = vcombine_s16(vget_high_s16(*q9s16), vget_low_s16(*q9s16));  // vswp
+  return;
+}
+
+static INLINE void IADST4x4_1D(int16x4_t *d3s16, int16x4_t *d4s16,
+                               int16x4_t *d5s16, int16x8_t *q3s16,
+                               int16x8_t *q8s16, int16x8_t *q9s16) {
+  int16x4_t d6s16, d16s16, d17s16, d18s16, d19s16;
+  int32x4_t q8s32, q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32;
+
+  d6s16 = vget_low_s16(*q3s16);
+
+  d16s16 = vget_low_s16(*q8s16);
+  d17s16 = vget_high_s16(*q8s16);
+  d18s16 = vget_low_s16(*q9s16);
+  d19s16 = vget_high_s16(*q9s16);
+
+  q10s32 = vmull_s16(*d3s16, d16s16);
+  q11s32 = vmull_s16(*d4s16, d16s16);
+  q12s32 = vmull_s16(d6s16, d17s16);
+  q13s32 = vmull_s16(*d5s16, d18s16);
+  q14s32 = vmull_s16(*d3s16, d18s16);
+  q15s32 = vmovl_s16(d16s16);
+  q15s32 = vaddw_s16(q15s32, d19s16);
+  q8s32 = vmull_s16(*d4s16, d19s16);
+  q15s32 = vsubw_s16(q15s32, d18s16);
+  q9s32 = vmull_s16(*d5s16, d19s16);
+
+  q10s32 = vaddq_s32(q10s32, q13s32);
+  q10s32 = vaddq_s32(q10s32, q8s32);
+  q11s32 = vsubq_s32(q11s32, q14s32);
+  q8s32 = vdupq_n_s32(sinpi_3_9);
+  q11s32 = vsubq_s32(q11s32, q9s32);
+  q15s32 = vmulq_s32(q15s32, q8s32);
+
+  q13s32 = vaddq_s32(q10s32, q12s32);
+  q10s32 = vaddq_s32(q10s32, q11s32);
+  q14s32 = vaddq_s32(q11s32, q12s32);
+  q10s32 = vsubq_s32(q10s32, q12s32);
+
+  d16s16 = vqrshrn_n_s32(q13s32, 14);
+  d17s16 = vqrshrn_n_s32(q14s32, 14);
+  d18s16 = vqrshrn_n_s32(q15s32, 14);
+  d19s16 = vqrshrn_n_s32(q10s32, 14);
+
+  *q8s16 = vcombine_s16(d16s16, d17s16);
+  *q9s16 = vcombine_s16(d18s16, d19s16);
+  return;
+}
+
+void vp10_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest,
+                             int dest_stride, int tx_type) {
+  uint8x8_t d26u8, d27u8;
+  int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16;
+  uint32x2_t d26u32, d27u32;
+  int16x8_t q3s16, q8s16, q9s16;
+  uint16x8_t q8u16, q9u16;
+
+  d26u32 = d27u32 = vdup_n_u32(0);
+
+  q8s16 = vld1q_s16(input);
+  q9s16 = vld1q_s16(input + 8);
+
+  TRANSPOSE4X4(&q8s16, &q9s16);
+
+  switch (tx_type) {
+    case 0:  // idct_idct is not supported. Fall back to C
+      vp10_iht4x4_16_add_c(input, dest, dest_stride, tx_type);
+      return;
+      break;
+    case 1:  // iadst_idct
+      // generate constants
+      GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16);
+      GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
+
+      // first transform rows
+      IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16);
+
+      // transpose the matrix
+      TRANSPOSE4X4(&q8s16, &q9s16);
+
+      // then transform columns
+      IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
+      break;
+    case 2:  // idct_iadst
+      // generate constantsyy
+      GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16);
+      GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
+
+      // first transform rows
+      IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
+
+      // transpose the matrix
+      TRANSPOSE4X4(&q8s16, &q9s16);
+
+      // then transform columns
+      IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16);
+      break;
+    case 3:  // iadst_iadst
+      // generate constants
+      GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
+
+      // first transform rows
+      IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
+
+      // transpose the matrix
+      TRANSPOSE4X4(&q8s16, &q9s16);
+
+      // then transform columns
+      IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
+      break;
+    default:  // iadst_idct
+      assert(0);
+      break;
+  }
+
+  q8s16 = vrshrq_n_s16(q8s16, 4);
+  q9s16 = vrshrq_n_s16(q9s16, 4);
+
+  d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 0);
+  dest += dest_stride;
+  d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 1);
+  dest += dest_stride;
+  d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 0);
+  dest += dest_stride;
+  d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 1);
+
+  q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u32(d26u32));
+  q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u32(d27u32));
+
+  d26u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
+  d27u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
+
+  vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 1);
+  dest -= dest_stride;
+  vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 0);
+  dest -= dest_stride;
+  vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 1);
+  dest -= dest_stride;
+  vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 0);
+  return;
+}
diff --git a/av1/common/arm/neon/iht8x8_add_neon.c b/av1/common/arm/neon/iht8x8_add_neon.c
new file mode 100644
index 0000000..ff5578d
--- /dev/null
+++ b/av1/common/arm/neon/iht8x8_add_neon.c
@@ -0,0 +1,607 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "./vp10_rtcd.h"
+#include "./vpx_config.h"
+#include "av1/common/common.h"
+
+static int16_t cospi_2_64 = 16305;
+static int16_t cospi_4_64 = 16069;
+static int16_t cospi_6_64 = 15679;
+static int16_t cospi_8_64 = 15137;
+static int16_t cospi_10_64 = 14449;
+static int16_t cospi_12_64 = 13623;
+static int16_t cospi_14_64 = 12665;
+static int16_t cospi_16_64 = 11585;
+static int16_t cospi_18_64 = 10394;
+static int16_t cospi_20_64 = 9102;
+static int16_t cospi_22_64 = 7723;
+static int16_t cospi_24_64 = 6270;
+static int16_t cospi_26_64 = 4756;
+static int16_t cospi_28_64 = 3196;
+static int16_t cospi_30_64 = 1606;
+
+static INLINE void TRANSPOSE8X8(int16x8_t *q8s16, int16x8_t *q9s16,
+                                int16x8_t *q10s16, int16x8_t *q11s16,
+                                int16x8_t *q12s16, int16x8_t *q13s16,
+                                int16x8_t *q14s16, int16x8_t *q15s16) {
+  int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
+  int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
+  int32x4x2_t q0x2s32, q1x2s32, q2x2s32, q3x2s32;
+  int16x8x2_t q0x2s16, q1x2s16, q2x2s16, q3x2s16;
+
+  d16s16 = vget_low_s16(*q8s16);
+  d17s16 = vget_high_s16(*q8s16);
+  d18s16 = vget_low_s16(*q9s16);
+  d19s16 = vget_high_s16(*q9s16);
+  d20s16 = vget_low_s16(*q10s16);
+  d21s16 = vget_high_s16(*q10s16);
+  d22s16 = vget_low_s16(*q11s16);
+  d23s16 = vget_high_s16(*q11s16);
+  d24s16 = vget_low_s16(*q12s16);
+  d25s16 = vget_high_s16(*q12s16);
+  d26s16 = vget_low_s16(*q13s16);
+  d27s16 = vget_high_s16(*q13s16);
+  d28s16 = vget_low_s16(*q14s16);
+  d29s16 = vget_high_s16(*q14s16);
+  d30s16 = vget_low_s16(*q15s16);
+  d31s16 = vget_high_s16(*q15s16);
+
+  *q8s16 = vcombine_s16(d16s16, d24s16);   // vswp d17, d24
+  *q9s16 = vcombine_s16(d18s16, d26s16);   // vswp d19, d26
+  *q10s16 = vcombine_s16(d20s16, d28s16);  // vswp d21, d28
+  *q11s16 = vcombine_s16(d22s16, d30s16);  // vswp d23, d30
+  *q12s16 = vcombine_s16(d17s16, d25s16);
+  *q13s16 = vcombine_s16(d19s16, d27s16);
+  *q14s16 = vcombine_s16(d21s16, d29s16);
+  *q15s16 = vcombine_s16(d23s16, d31s16);
+
+  q0x2s32 =
+      vtrnq_s32(vreinterpretq_s32_s16(*q8s16), vreinterpretq_s32_s16(*q10s16));
+  q1x2s32 =
+      vtrnq_s32(vreinterpretq_s32_s16(*q9s16), vreinterpretq_s32_s16(*q11s16));
+  q2x2s32 =
+      vtrnq_s32(vreinterpretq_s32_s16(*q12s16), vreinterpretq_s32_s16(*q14s16));
+  q3x2s32 =
+      vtrnq_s32(vreinterpretq_s32_s16(*q13s16), vreinterpretq_s32_s16(*q15s16));
+
+  q0x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[0]),   // q8
+                      vreinterpretq_s16_s32(q1x2s32.val[0]));  // q9
+  q1x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[1]),   // q10
+                      vreinterpretq_s16_s32(q1x2s32.val[1]));  // q11
+  q2x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[0]),   // q12
+                      vreinterpretq_s16_s32(q3x2s32.val[0]));  // q13
+  q3x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[1]),   // q14
+                      vreinterpretq_s16_s32(q3x2s32.val[1]));  // q15
+
+  *q8s16 = q0x2s16.val[0];
+  *q9s16 = q0x2s16.val[1];
+  *q10s16 = q1x2s16.val[0];
+  *q11s16 = q1x2s16.val[1];
+  *q12s16 = q2x2s16.val[0];
+  *q13s16 = q2x2s16.val[1];
+  *q14s16 = q3x2s16.val[0];
+  *q15s16 = q3x2s16.val[1];
+  return;
+}
+
+static INLINE void IDCT8x8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
+                              int16x8_t *q10s16, int16x8_t *q11s16,
+                              int16x8_t *q12s16, int16x8_t *q13s16,
+                              int16x8_t *q14s16, int16x8_t *q15s16) {
+  int16x4_t d0s16, d1s16, d2s16, d3s16;
+  int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16;
+  int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
+  int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
+  int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16;
+  int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32;
+  int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32;
+
+  d0s16 = vdup_n_s16(cospi_28_64);
+  d1s16 = vdup_n_s16(cospi_4_64);
+  d2s16 = vdup_n_s16(cospi_12_64);
+  d3s16 = vdup_n_s16(cospi_20_64);
+
+  d16s16 = vget_low_s16(*q8s16);
+  d17s16 = vget_high_s16(*q8s16);
+  d18s16 = vget_low_s16(*q9s16);
+  d19s16 = vget_high_s16(*q9s16);
+  d20s16 = vget_low_s16(*q10s16);
+  d21s16 = vget_high_s16(*q10s16);
+  d22s16 = vget_low_s16(*q11s16);
+  d23s16 = vget_high_s16(*q11s16);
+  d24s16 = vget_low_s16(*q12s16);
+  d25s16 = vget_high_s16(*q12s16);
+  d26s16 = vget_low_s16(*q13s16);
+  d27s16 = vget_high_s16(*q13s16);
+  d28s16 = vget_low_s16(*q14s16);
+  d29s16 = vget_high_s16(*q14s16);
+  d30s16 = vget_low_s16(*q15s16);
+  d31s16 = vget_high_s16(*q15s16);
+
+  q2s32 = vmull_s16(d18s16, d0s16);
+  q3s32 = vmull_s16(d19s16, d0s16);
+  q5s32 = vmull_s16(d26s16, d2s16);
+  q6s32 = vmull_s16(d27s16, d2s16);
+
+  q2s32 = vmlsl_s16(q2s32, d30s16, d1s16);
+  q3s32 = vmlsl_s16(q3s32, d31s16, d1s16);
+  q5s32 = vmlsl_s16(q5s32, d22s16, d3s16);
+  q6s32 = vmlsl_s16(q6s32, d23s16, d3s16);
+
+  d8s16 = vqrshrn_n_s32(q2s32, 14);
+  d9s16 = vqrshrn_n_s32(q3s32, 14);
+  d10s16 = vqrshrn_n_s32(q5s32, 14);
+  d11s16 = vqrshrn_n_s32(q6s32, 14);
+  q4s16 = vcombine_s16(d8s16, d9s16);
+  q5s16 = vcombine_s16(d10s16, d11s16);
+
+  q2s32 = vmull_s16(d18s16, d1s16);
+  q3s32 = vmull_s16(d19s16, d1s16);
+  q9s32 = vmull_s16(d26s16, d3s16);
+  q13s32 = vmull_s16(d27s16, d3s16);
+
+  q2s32 = vmlal_s16(q2s32, d30s16, d0s16);
+  q3s32 = vmlal_s16(q3s32, d31s16, d0s16);
+  q9s32 = vmlal_s16(q9s32, d22s16, d2s16);
+  q13s32 = vmlal_s16(q13s32, d23s16, d2s16);
+
+  d14s16 = vqrshrn_n_s32(q2s32, 14);
+  d15s16 = vqrshrn_n_s32(q3s32, 14);
+  d12s16 = vqrshrn_n_s32(q9s32, 14);
+  d13s16 = vqrshrn_n_s32(q13s32, 14);
+  q6s16 = vcombine_s16(d12s16, d13s16);
+  q7s16 = vcombine_s16(d14s16, d15s16);
+
+  d0s16 = vdup_n_s16(cospi_16_64);
+
+  q2s32 = vmull_s16(d16s16, d0s16);
+  q3s32 = vmull_s16(d17s16, d0s16);
+  q13s32 = vmull_s16(d16s16, d0s16);
+  q15s32 = vmull_s16(d17s16, d0s16);
+
+  q2s32 = vmlal_s16(q2s32, d24s16, d0s16);
+  q3s32 = vmlal_s16(q3s32, d25s16, d0s16);
+  q13s32 = vmlsl_s16(q13s32, d24s16, d0s16);
+  q15s32 = vmlsl_s16(q15s32, d25s16, d0s16);
+
+  d0s16 = vdup_n_s16(cospi_24_64);
+  d1s16 = vdup_n_s16(cospi_8_64);
+
+  d18s16 = vqrshrn_n_s32(q2s32, 14);
+  d19s16 = vqrshrn_n_s32(q3s32, 14);
+  d22s16 = vqrshrn_n_s32(q13s32, 14);
+  d23s16 = vqrshrn_n_s32(q15s32, 14);
+  *q9s16 = vcombine_s16(d18s16, d19s16);
+  *q11s16 = vcombine_s16(d22s16, d23s16);
+
+  q2s32 = vmull_s16(d20s16, d0s16);
+  q3s32 = vmull_s16(d21s16, d0s16);
+  q8s32 = vmull_s16(d20s16, d1s16);
+  q12s32 = vmull_s16(d21s16, d1s16);
+
+  q2s32 = vmlsl_s16(q2s32, d28s16, d1s16);
+  q3s32 = vmlsl_s16(q3s32, d29s16, d1s16);
+  q8s32 = vmlal_s16(q8s32, d28s16, d0s16);
+  q12s32 = vmlal_s16(q12s32, d29s16, d0s16);
+
+  d26s16 = vqrshrn_n_s32(q2s32, 14);
+  d27s16 = vqrshrn_n_s32(q3s32, 14);
+  d30s16 = vqrshrn_n_s32(q8s32, 14);
+  d31s16 = vqrshrn_n_s32(q12s32, 14);
+  *q13s16 = vcombine_s16(d26s16, d27s16);
+  *q15s16 = vcombine_s16(d30s16, d31s16);
+
+  q0s16 = vaddq_s16(*q9s16, *q15s16);
+  q1s16 = vaddq_s16(*q11s16, *q13s16);
+  q2s16 = vsubq_s16(*q11s16, *q13s16);
+  q3s16 = vsubq_s16(*q9s16, *q15s16);
+
+  *q13s16 = vsubq_s16(q4s16, q5s16);
+  q4s16 = vaddq_s16(q4s16, q5s16);
+  *q14s16 = vsubq_s16(q7s16, q6s16);
+  q7s16 = vaddq_s16(q7s16, q6s16);
+  d26s16 = vget_low_s16(*q13s16);
+  d27s16 = vget_high_s16(*q13s16);
+  d28s16 = vget_low_s16(*q14s16);
+  d29s16 = vget_high_s16(*q14s16);
+
+  d16s16 = vdup_n_s16(cospi_16_64);
+
+  q9s32 = vmull_s16(d28s16, d16s16);
+  q10s32 = vmull_s16(d29s16, d16s16);
+  q11s32 = vmull_s16(d28s16, d16s16);
+  q12s32 = vmull_s16(d29s16, d16s16);
+
+  q9s32 = vmlsl_s16(q9s32, d26s16, d16s16);
+  q10s32 = vmlsl_s16(q10s32, d27s16, d16s16);
+  q11s32 = vmlal_s16(q11s32, d26s16, d16s16);
+  q12s32 = vmlal_s16(q12s32, d27s16, d16s16);
+
+  d10s16 = vqrshrn_n_s32(q9s32, 14);
+  d11s16 = vqrshrn_n_s32(q10s32, 14);
+  d12s16 = vqrshrn_n_s32(q11s32, 14);
+  d13s16 = vqrshrn_n_s32(q12s32, 14);
+  q5s16 = vcombine_s16(d10s16, d11s16);
+  q6s16 = vcombine_s16(d12s16, d13s16);
+
+  *q8s16 = vaddq_s16(q0s16, q7s16);
+  *q9s16 = vaddq_s16(q1s16, q6s16);
+  *q10s16 = vaddq_s16(q2s16, q5s16);
+  *q11s16 = vaddq_s16(q3s16, q4s16);
+  *q12s16 = vsubq_s16(q3s16, q4s16);
+  *q13s16 = vsubq_s16(q2s16, q5s16);
+  *q14s16 = vsubq_s16(q1s16, q6s16);
+  *q15s16 = vsubq_s16(q0s16, q7s16);
+  return;
+}
+
+static INLINE void IADST8X8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
+                               int16x8_t *q10s16, int16x8_t *q11s16,
+                               int16x8_t *q12s16, int16x8_t *q13s16,
+                               int16x8_t *q14s16, int16x8_t *q15s16) {
+  int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16;
+  int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16;
+  int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
+  int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
+  int16x8_t q2s16, q4s16, q5s16, q6s16;
+  int32x4_t q0s32, q1s32, q2s32, q3s32, q4s32, q5s32, q6s32, q7s32, q8s32;
+  int32x4_t q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32;
+
+  d16s16 = vget_low_s16(*q8s16);
+  d17s16 = vget_high_s16(*q8s16);
+  d18s16 = vget_low_s16(*q9s16);
+  d19s16 = vget_high_s16(*q9s16);
+  d20s16 = vget_low_s16(*q10s16);
+  d21s16 = vget_high_s16(*q10s16);
+  d22s16 = vget_low_s16(*q11s16);
+  d23s16 = vget_high_s16(*q11s16);
+  d24s16 = vget_low_s16(*q12s16);
+  d25s16 = vget_high_s16(*q12s16);
+  d26s16 = vget_low_s16(*q13s16);
+  d27s16 = vget_high_s16(*q13s16);
+  d28s16 = vget_low_s16(*q14s16);
+  d29s16 = vget_high_s16(*q14s16);
+  d30s16 = vget_low_s16(*q15s16);
+  d31s16 = vget_high_s16(*q15s16);
+
+  d14s16 = vdup_n_s16(cospi_2_64);
+  d15s16 = vdup_n_s16(cospi_30_64);
+
+  q1s32 = vmull_s16(d30s16, d14s16);
+  q2s32 = vmull_s16(d31s16, d14s16);
+  q3s32 = vmull_s16(d30s16, d15s16);
+  q4s32 = vmull_s16(d31s16, d15s16);
+
+  d30s16 = vdup_n_s16(cospi_18_64);
+  d31s16 = vdup_n_s16(cospi_14_64);
+
+  q1s32 = vmlal_s16(q1s32, d16s16, d15s16);
+  q2s32 = vmlal_s16(q2s32, d17s16, d15s16);
+  q3s32 = vmlsl_s16(q3s32, d16s16, d14s16);
+  q4s32 = vmlsl_s16(q4s32, d17s16, d14s16);
+
+  q5s32 = vmull_s16(d22s16, d30s16);
+  q6s32 = vmull_s16(d23s16, d30s16);
+  q7s32 = vmull_s16(d22s16, d31s16);
+  q8s32 = vmull_s16(d23s16, d31s16);
+
+  q5s32 = vmlal_s16(q5s32, d24s16, d31s16);
+  q6s32 = vmlal_s16(q6s32, d25s16, d31s16);
+  q7s32 = vmlsl_s16(q7s32, d24s16, d30s16);
+  q8s32 = vmlsl_s16(q8s32, d25s16, d30s16);
+
+  q11s32 = vaddq_s32(q1s32, q5s32);
+  q12s32 = vaddq_s32(q2s32, q6s32);
+  q1s32 = vsubq_s32(q1s32, q5s32);
+  q2s32 = vsubq_s32(q2s32, q6s32);
+
+  d22s16 = vqrshrn_n_s32(q11s32, 14);
+  d23s16 = vqrshrn_n_s32(q12s32, 14);
+  *q11s16 = vcombine_s16(d22s16, d23s16);
+
+  q12s32 = vaddq_s32(q3s32, q7s32);
+  q15s32 = vaddq_s32(q4s32, q8s32);
+  q3s32 = vsubq_s32(q3s32, q7s32);
+  q4s32 = vsubq_s32(q4s32, q8s32);
+
+  d2s16 = vqrshrn_n_s32(q1s32, 14);
+  d3s16 = vqrshrn_n_s32(q2s32, 14);
+  d24s16 = vqrshrn_n_s32(q12s32, 14);
+  d25s16 = vqrshrn_n_s32(q15s32, 14);
+  d6s16 = vqrshrn_n_s32(q3s32, 14);
+  d7s16 = vqrshrn_n_s32(q4s32, 14);
+  *q12s16 = vcombine_s16(d24s16, d25s16);
+
+  d0s16 = vdup_n_s16(cospi_10_64);
+  d1s16 = vdup_n_s16(cospi_22_64);
+  q4s32 = vmull_s16(d26s16, d0s16);
+  q5s32 = vmull_s16(d27s16, d0s16);
+  q2s32 = vmull_s16(d26s16, d1s16);
+  q6s32 = vmull_s16(d27s16, d1s16);
+
+  d30s16 = vdup_n_s16(cospi_26_64);
+  d31s16 = vdup_n_s16(cospi_6_64);
+
+  q4s32 = vmlal_s16(q4s32, d20s16, d1s16);
+  q5s32 = vmlal_s16(q5s32, d21s16, d1s16);
+  q2s32 = vmlsl_s16(q2s32, d20s16, d0s16);
+  q6s32 = vmlsl_s16(q6s32, d21s16, d0s16);
+
+  q0s32 = vmull_s16(d18s16, d30s16);
+  q13s32 = vmull_s16(d19s16, d30s16);
+
+  q0s32 = vmlal_s16(q0s32, d28s16, d31s16);
+  q13s32 = vmlal_s16(q13s32, d29s16, d31s16);
+
+  q10s32 = vmull_s16(d18s16, d31s16);
+  q9s32 = vmull_s16(d19s16, d31s16);
+
+  q10s32 = vmlsl_s16(q10s32, d28s16, d30s16);
+  q9s32 = vmlsl_s16(q9s32, d29s16, d30s16);
+
+  q14s32 = vaddq_s32(q2s32, q10s32);
+  q15s32 = vaddq_s32(q6s32, q9s32);
+  q2s32 = vsubq_s32(q2s32, q10s32);
+  q6s32 = vsubq_s32(q6s32, q9s32);
+
+  d28s16 = vqrshrn_n_s32(q14s32, 14);
+  d29s16 = vqrshrn_n_s32(q15s32, 14);
+  d4s16 = vqrshrn_n_s32(q2s32, 14);
+  d5s16 = vqrshrn_n_s32(q6s32, 14);
+  *q14s16 = vcombine_s16(d28s16, d29s16);
+
+  q9s32 = vaddq_s32(q4s32, q0s32);
+  q10s32 = vaddq_s32(q5s32, q13s32);
+  q4s32 = vsubq_s32(q4s32, q0s32);
+  q5s32 = vsubq_s32(q5s32, q13s32);
+
+  d30s16 = vdup_n_s16(cospi_8_64);
+  d31s16 = vdup_n_s16(cospi_24_64);
+
+  d18s16 = vqrshrn_n_s32(q9s32, 14);
+  d19s16 = vqrshrn_n_s32(q10s32, 14);
+  d8s16 = vqrshrn_n_s32(q4s32, 14);
+  d9s16 = vqrshrn_n_s32(q5s32, 14);
+  *q9s16 = vcombine_s16(d18s16, d19s16);
+
+  q5s32 = vmull_s16(d2s16, d30s16);
+  q6s32 = vmull_s16(d3s16, d30s16);
+  q7s32 = vmull_s16(d2s16, d31s16);
+  q0s32 = vmull_s16(d3s16, d31s16);
+
+  q5s32 = vmlal_s16(q5s32, d6s16, d31s16);
+  q6s32 = vmlal_s16(q6s32, d7s16, d31s16);
+  q7s32 = vmlsl_s16(q7s32, d6s16, d30s16);
+  q0s32 = vmlsl_s16(q0s32, d7s16, d30s16);
+
+  q1s32 = vmull_s16(d4s16, d30s16);
+  q3s32 = vmull_s16(d5s16, d30s16);
+  q10s32 = vmull_s16(d4s16, d31s16);
+  q2s32 = vmull_s16(d5s16, d31s16);
+
+  q1s32 = vmlsl_s16(q1s32, d8s16, d31s16);
+  q3s32 = vmlsl_s16(q3s32, d9s16, d31s16);
+  q10s32 = vmlal_s16(q10s32, d8s16, d30s16);
+  q2s32 = vmlal_s16(q2s32, d9s16, d30s16);
+
+  *q8s16 = vaddq_s16(*q11s16, *q9s16);
+  *q11s16 = vsubq_s16(*q11s16, *q9s16);
+  q4s16 = vaddq_s16(*q12s16, *q14s16);
+  *q12s16 = vsubq_s16(*q12s16, *q14s16);
+
+  q14s32 = vaddq_s32(q5s32, q1s32);
+  q15s32 = vaddq_s32(q6s32, q3s32);
+  q5s32 = vsubq_s32(q5s32, q1s32);
+  q6s32 = vsubq_s32(q6s32, q3s32);
+
+  d18s16 = vqrshrn_n_s32(q14s32, 14);
+  d19s16 = vqrshrn_n_s32(q15s32, 14);
+  d10s16 = vqrshrn_n_s32(q5s32, 14);
+  d11s16 = vqrshrn_n_s32(q6s32, 14);
+  *q9s16 = vcombine_s16(d18s16, d19s16);
+
+  q1s32 = vaddq_s32(q7s32, q10s32);
+  q3s32 = vaddq_s32(q0s32, q2s32);
+  q7s32 = vsubq_s32(q7s32, q10s32);
+  q0s32 = vsubq_s32(q0s32, q2s32);
+
+  d28s16 = vqrshrn_n_s32(q1s32, 14);
+  d29s16 = vqrshrn_n_s32(q3s32, 14);
+  d14s16 = vqrshrn_n_s32(q7s32, 14);
+  d15s16 = vqrshrn_n_s32(q0s32, 14);
+  *q14s16 = vcombine_s16(d28s16, d29s16);
+
+  d30s16 = vdup_n_s16(cospi_16_64);
+
+  d22s16 = vget_low_s16(*q11s16);
+  d23s16 = vget_high_s16(*q11s16);
+  q2s32 = vmull_s16(d22s16, d30s16);
+  q3s32 = vmull_s16(d23s16, d30s16);
+  q13s32 = vmull_s16(d22s16, d30s16);
+  q1s32 = vmull_s16(d23s16, d30s16);
+
+  d24s16 = vget_low_s16(*q12s16);
+  d25s16 = vget_high_s16(*q12s16);
+  q2s32 = vmlal_s16(q2s32, d24s16, d30s16);
+  q3s32 = vmlal_s16(q3s32, d25s16, d30s16);
+  q13s32 = vmlsl_s16(q13s32, d24s16, d30s16);
+  q1s32 = vmlsl_s16(q1s32, d25s16, d30s16);
+
+  d4s16 = vqrshrn_n_s32(q2s32, 14);
+  d5s16 = vqrshrn_n_s32(q3s32, 14);
+  d24s16 = vqrshrn_n_s32(q13s32, 14);
+  d25s16 = vqrshrn_n_s32(q1s32, 14);
+  q2s16 = vcombine_s16(d4s16, d5s16);
+  *q12s16 = vcombine_s16(d24s16, d25s16);
+
+  q13s32 = vmull_s16(d10s16, d30s16);
+  q1s32 = vmull_s16(d11s16, d30s16);
+  q11s32 = vmull_s16(d10s16, d30s16);
+  q0s32 = vmull_s16(d11s16, d30s16);
+
+  q13s32 = vmlal_s16(q13s32, d14s16, d30s16);
+  q1s32 = vmlal_s16(q1s32, d15s16, d30s16);
+  q11s32 = vmlsl_s16(q11s32, d14s16, d30s16);
+  q0s32 = vmlsl_s16(q0s32, d15s16, d30s16);
+
+  d20s16 = vqrshrn_n_s32(q13s32, 14);
+  d21s16 = vqrshrn_n_s32(q1s32, 14);
+  d12s16 = vqrshrn_n_s32(q11s32, 14);
+  d13s16 = vqrshrn_n_s32(q0s32, 14);
+  *q10s16 = vcombine_s16(d20s16, d21s16);
+  q6s16 = vcombine_s16(d12s16, d13s16);
+
+  q5s16 = vdupq_n_s16(0);
+
+  *q9s16 = vsubq_s16(q5s16, *q9s16);
+  *q11s16 = vsubq_s16(q5s16, q2s16);
+  *q13s16 = vsubq_s16(q5s16, q6s16);
+  *q15s16 = vsubq_s16(q5s16, q4s16);
+  return;
+}
+
+void vp10_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest,
+                             int dest_stride, int tx_type) {
+  int i;
+  uint8_t *d1, *d2;
+  uint8x8_t d0u8, d1u8, d2u8, d3u8;
+  uint64x1_t d0u64, d1u64, d2u64, d3u64;
+  int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16;
+  uint16x8_t q8u16, q9u16, q10u16, q11u16;
+
+  q8s16 = vld1q_s16(input);
+  q9s16 = vld1q_s16(input + 8);
+  q10s16 = vld1q_s16(input + 8 * 2);
+  q11s16 = vld1q_s16(input + 8 * 3);
+  q12s16 = vld1q_s16(input + 8 * 4);
+  q13s16 = vld1q_s16(input + 8 * 5);
+  q14s16 = vld1q_s16(input + 8 * 6);
+  q15s16 = vld1q_s16(input + 8 * 7);
+
+  TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
+               &q15s16);
+
+  switch (tx_type) {
+    case 0:  // idct_idct is not supported. Fall back to C
+      vp10_iht8x8_64_add_c(input, dest, dest_stride, tx_type);
+      return;
+      break;
+    case 1:  // iadst_idct
+      // generate IDCT constants
+      // GENERATE_IDCT_CONSTANTS
+
+      // first transform rows
+      IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
+                 &q15s16);
+
+      // transpose the matrix
+      TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
+                   &q15s16);
+
+      // generate IADST constants
+      // GENERATE_IADST_CONSTANTS
+
+      // then transform columns
+      IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
+                  &q15s16);
+      break;
+    case 2:  // idct_iadst
+      // generate IADST constants
+      // GENERATE_IADST_CONSTANTS
+
+      // first transform rows
+      IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
+                  &q15s16);
+
+      // transpose the matrix
+      TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
+                   &q15s16);
+
+      // generate IDCT constants
+      // GENERATE_IDCT_CONSTANTS
+
+      // then transform columns
+      IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
+                 &q15s16);
+      break;
+    case 3:  // iadst_iadst
+      // generate IADST constants
+      // GENERATE_IADST_CONSTANTS
+
+      // first transform rows
+      IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
+                  &q15s16);
+
+      // transpose the matrix
+      TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
+                   &q15s16);
+
+      // then transform columns
+      IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
+                  &q15s16);
+      break;
+    default:  // iadst_idct
+      assert(0);
+      break;
+  }
+
+  q8s16 = vrshrq_n_s16(q8s16, 5);
+  q9s16 = vrshrq_n_s16(q9s16, 5);
+  q10s16 = vrshrq_n_s16(q10s16, 5);
+  q11s16 = vrshrq_n_s16(q11s16, 5);
+  q12s16 = vrshrq_n_s16(q12s16, 5);
+  q13s16 = vrshrq_n_s16(q13s16, 5);
+  q14s16 = vrshrq_n_s16(q14s16, 5);
+  q15s16 = vrshrq_n_s16(q15s16, 5);
+
+  for (d1 = d2 = dest, i = 0; i < 2; i++) {
+    if (i != 0) {
+      q8s16 = q12s16;
+      q9s16 = q13s16;
+      q10s16 = q14s16;
+      q11s16 = q15s16;
+    }
+
+    d0u64 = vld1_u64((uint64_t *)d1);
+    d1 += dest_stride;
+    d1u64 = vld1_u64((uint64_t *)d1);
+    d1 += dest_stride;
+    d2u64 = vld1_u64((uint64_t *)d1);
+    d1 += dest_stride;
+    d3u64 = vld1_u64((uint64_t *)d1);
+    d1 += dest_stride;
+
+    q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u64(d0u64));
+    q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u64(d1u64));
+    q10u16 =
+        vaddw_u8(vreinterpretq_u16_s16(q10s16), vreinterpret_u8_u64(d2u64));
+    q11u16 =
+        vaddw_u8(vreinterpretq_u16_s16(q11s16), vreinterpret_u8_u64(d3u64));
+
+    d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
+    d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
+    d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16));
+    d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16));
+
+    vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8));
+    d2 += dest_stride;
+    vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8));
+    d2 += dest_stride;
+    vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8));
+    d2 += dest_stride;
+    vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8));
+    d2 += dest_stride;
+  }
+  return;
+}
diff --git a/av1/common/blockd.c b/av1/common/blockd.c
new file mode 100644
index 0000000..ee95271
--- /dev/null
+++ b/av1/common/blockd.c
@@ -0,0 +1,165 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+
+#include "aom_ports/system_state.h"
+
+#include "av1/common/blockd.h"
+
+PREDICTION_MODE vp10_left_block_mode(const MODE_INFO *cur_mi,
+                                     const MODE_INFO *left_mi, int b) {
+  if (b == 0 || b == 2) {
+    if (!left_mi || is_inter_block(&left_mi->mbmi)) return DC_PRED;
+
+    return get_y_mode(left_mi, b + 1);
+  } else {
+    assert(b == 1 || b == 3);
+    return cur_mi->bmi[b - 1].as_mode;
+  }
+}
+
+PREDICTION_MODE vp10_above_block_mode(const MODE_INFO *cur_mi,
+                                      const MODE_INFO *above_mi, int b) {
+  if (b == 0 || b == 1) {
+    if (!above_mi || is_inter_block(&above_mi->mbmi)) return DC_PRED;
+
+    return get_y_mode(above_mi, b + 2);
+  } else {
+    assert(b == 2 || b == 3);
+    return cur_mi->bmi[b - 2].as_mode;
+  }
+}
+
+void vp10_foreach_transformed_block_in_plane(
+    const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane,
+    foreach_transformed_block_visitor visit, void *arg) {
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+  // block and transform sizes, in number of 4x4 blocks log 2 ("*_b")
+  // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
+  // transform size varies per plane, look it up in a common way.
+  const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
+  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+  const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+  const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+  const uint8_t num_4x4_tw = num_4x4_blocks_wide_txsize_lookup[tx_size];
+  const uint8_t num_4x4_th = num_4x4_blocks_high_txsize_lookup[tx_size];
+  const int step = num_4x4_tw * num_4x4_th;
+  int i = 0, r, c;
+
+  // If mb_to_right_edge is < 0 we are in a situation in which
+  // the current block size extends into the UMV and we won't
+  // visit the sub blocks that are wholly within the UMV.
+  const int max_blocks_wide =
+      num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 : xd->mb_to_right_edge >>
+                                                       (5 + pd->subsampling_x));
+  const int max_blocks_high =
+      num_4x4_h + (xd->mb_to_bottom_edge >= 0
+                       ? 0
+                       : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+  const int extra_step = ((num_4x4_w - max_blocks_wide) >>
+                          num_4x4_blocks_wide_txsize_log2_lookup[tx_size]) *
+                         step;
+
+  // Keep track of the row and column of the blocks we use so that we know
+  // if we are in the unrestricted motion border.
+  for (r = 0; r < max_blocks_high; r += num_4x4_th) {
+    // Skip visiting the sub blocks that are wholly within the UMV.
+    for (c = 0; c < max_blocks_wide; c += num_4x4_tw) {
+      visit(plane, i, r, c, plane_bsize, tx_size, arg);
+      i += step;
+    }
+    i += extra_step;
+  }
+}
+
+void vp10_foreach_transformed_block(const MACROBLOCKD *const xd,
+                                    BLOCK_SIZE bsize,
+                                    foreach_transformed_block_visitor visit,
+                                    void *arg) {
+  int plane;
+  for (plane = 0; plane < MAX_MB_PLANE; ++plane)
+    vp10_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg);
+}
+
+void vp10_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
+                       BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob,
+                       int aoff, int loff) {
+  ENTROPY_CONTEXT *const a = pd->above_context + aoff;
+  ENTROPY_CONTEXT *const l = pd->left_context + loff;
+  const int tx_w_in_blocks = num_4x4_blocks_wide_txsize_lookup[tx_size];
+  const int tx_h_in_blocks = num_4x4_blocks_high_txsize_lookup[tx_size];
+
+  // above
+  if (has_eob && xd->mb_to_right_edge < 0) {
+    int i;
+    const int blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize] +
+                            (xd->mb_to_right_edge >> (5 + pd->subsampling_x));
+    int above_contexts = tx_w_in_blocks;
+    if (above_contexts + aoff > blocks_wide)
+      above_contexts = blocks_wide - aoff;
+
+    for (i = 0; i < above_contexts; ++i) a[i] = has_eob;
+    for (i = above_contexts; i < tx_w_in_blocks; ++i) a[i] = 0;
+  } else {
+    memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_w_in_blocks);
+  }
+
+  // left
+  if (has_eob && xd->mb_to_bottom_edge < 0) {
+    int i;
+    const int blocks_high = num_4x4_blocks_high_lookup[plane_bsize] +
+                            (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+    int left_contexts = tx_h_in_blocks;
+    if (left_contexts + loff > blocks_high) left_contexts = blocks_high - loff;
+
+    for (i = 0; i < left_contexts; ++i) l[i] = has_eob;
+    for (i = left_contexts; i < tx_h_in_blocks; ++i) l[i] = 0;
+  } else {
+    memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_h_in_blocks);
+  }
+}
+
+void vp10_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y) {
+  int i;
+
+  for (i = 0; i < MAX_MB_PLANE; i++) {
+    xd->plane[i].plane_type = i ? PLANE_TYPE_UV : PLANE_TYPE_Y;
+    xd->plane[i].subsampling_x = i ? ss_x : 0;
+    xd->plane[i].subsampling_y = i ? ss_y : 0;
+  }
+}
+
+#if CONFIG_EXT_INTRA
+const int16_t dr_intra_derivative[90] = {
+  1,    14666, 7330, 4884, 3660, 2926, 2435, 2084, 1821, 1616, 1451, 1317, 1204,
+  1108, 1026,  955,  892,  837,  787,  743,  703,  666,  633,  603,  574,  548,
+  524,  502,   481,  461,  443,  426,  409,  394,  379,  365,  352,  339,  327,
+  316,  305,   294,  284,  274,  265,  256,  247,  238,  230,  222,  214,  207,
+  200,  192,   185,  179,  172,  166,  159,  153,  147,  141,  136,  130,  124,
+  119,  113,   108,  103,  98,   93,   88,   83,   78,   73,   68,   63,   59,
+  54,   49,    45,   40,   35,   31,   26,   22,   17,   13,   8,    4,
+};
+
+// Returns whether filter selection is needed for a given
+// intra prediction angle.
+int vp10_is_intra_filter_switchable(int angle) {
+  assert(angle > 0 && angle < 270);
+  if (angle % 45 == 0) return 0;
+  if (angle > 90 && angle < 180) {
+    return 1;
+  } else {
+    return ((angle < 90 ? dr_intra_derivative[angle]
+                        : dr_intra_derivative[270 - angle]) &
+            0xFF) > 0;
+  }
+}
+#endif  // CONFIG_EXT_INTRA
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
new file mode 100644
index 0000000..4dcc1f0
--- /dev/null
+++ b/av1/common/blockd.h
@@ -0,0 +1,833 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_BLOCKD_H_
+#define VP10_COMMON_BLOCKD_H_
+
+#include "./vpx_config.h"
+
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_ports/mem.h"
+#include "aom_scale/yv12config.h"
+
+#include "av1/common/common_data.h"
+#include "av1/common/quant_common.h"
+#include "av1/common/entropy.h"
+#include "av1/common/entropymode.h"
+#include "av1/common/mv.h"
+#include "av1/common/scale.h"
+#include "av1/common/seg_common.h"
+#include "av1/common/tile_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_MB_PLANE 3
+
+typedef enum {
+  KEY_FRAME = 0,
+  INTER_FRAME = 1,
+  FRAME_TYPES,
+} FRAME_TYPE;
+
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+#define IsInterpolatingFilter(filter) (vp10_is_interpolating_filter(filter))
+#else
+#define IsInterpolatingFilter(filter) (1)
+#endif  // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+
+static INLINE int is_inter_mode(PREDICTION_MODE mode) {
+#if CONFIG_EXT_INTER
+  return mode >= NEARESTMV && mode <= NEW_NEWMV;
+#else
+  return mode >= NEARESTMV && mode <= NEWMV;
+#endif  // CONFIG_EXT_INTER
+}
+
+#if CONFIG_EXT_INTER
+static INLINE int is_inter_singleref_mode(PREDICTION_MODE mode) {
+  return mode >= NEARESTMV && mode <= NEWFROMNEARMV;
+}
+
+static INLINE int is_inter_compound_mode(PREDICTION_MODE mode) {
+  return mode >= NEAREST_NEARESTMV && mode <= NEW_NEWMV;
+}
+
+static INLINE PREDICTION_MODE compound_ref0_mode(PREDICTION_MODE mode) {
+  static PREDICTION_MODE lut[MB_MODE_COUNT] = {
+    MB_MODE_COUNT,  // DC_PRED            0
+    MB_MODE_COUNT,  // V_PRED             1
+    MB_MODE_COUNT,  // H_PRED             2
+    MB_MODE_COUNT,  // D45_PRED           3
+    MB_MODE_COUNT,  // D135_PRED          4
+    MB_MODE_COUNT,  // D117_PRED          5
+    MB_MODE_COUNT,  // D153_PRED          6
+    MB_MODE_COUNT,  // D207_PRED          7
+    MB_MODE_COUNT,  // D63_PRED           8
+    MB_MODE_COUNT,  // TM_PRED            9
+    MB_MODE_COUNT,  // NEARESTMV         10
+    MB_MODE_COUNT,  // NEARMV            11
+    MB_MODE_COUNT,  // ZEROMV            12
+    MB_MODE_COUNT,  // NEWMV             13
+    MB_MODE_COUNT,  // NEWFROMNEARMV     14
+    NEARESTMV,      // NEAREST_NEARESTMV 15
+    NEARESTMV,      // NEAREST_NEARMV    16
+    NEARMV,         // NEAR_NEARESTMV    17
+    NEARMV,         // NEAR_NEARMV       18
+    NEARESTMV,      // NEAREST_NEWMV     19
+    NEWMV,          // NEW_NEARESTMV     20
+    NEARMV,         // NEAR_NEWMV        21
+    NEWMV,          // NEW_NEARMV        22
+    ZEROMV,         // ZERO_ZEROMV       23
+    NEWMV,          // NEW_NEWMV         24
+  };
+  assert(is_inter_compound_mode(mode));
+  return lut[mode];
+}
+
+static INLINE PREDICTION_MODE compound_ref1_mode(PREDICTION_MODE mode) {
+  static PREDICTION_MODE lut[MB_MODE_COUNT] = {
+    MB_MODE_COUNT,  // DC_PRED            0
+    MB_MODE_COUNT,  // V_PRED             1
+    MB_MODE_COUNT,  // H_PRED             2
+    MB_MODE_COUNT,  // D45_PRED           3
+    MB_MODE_COUNT,  // D135_PRED          4
+    MB_MODE_COUNT,  // D117_PRED          5
+    MB_MODE_COUNT,  // D153_PRED          6
+    MB_MODE_COUNT,  // D207_PRED          7
+    MB_MODE_COUNT,  // D63_PRED           8
+    MB_MODE_COUNT,  // TM_PRED            9
+    MB_MODE_COUNT,  // NEARESTMV         10
+    MB_MODE_COUNT,  // NEARMV            11
+    MB_MODE_COUNT,  // ZEROMV            12
+    MB_MODE_COUNT,  // NEWMV             13
+    MB_MODE_COUNT,  // NEWFROMNEARMV     14
+    NEARESTMV,      // NEAREST_NEARESTMV 15
+    NEARMV,         // NEAREST_NEARMV    16
+    NEARESTMV,      // NEAR_NEARESTMV    17
+    NEARMV,         // NEAR_NEARMV       18
+    NEWMV,          // NEAREST_NEWMV     19
+    NEARESTMV,      // NEW_NEARESTMV     20
+    NEWMV,          // NEAR_NEWMV        21
+    NEARMV,         // NEW_NEARMV        22
+    ZEROMV,         // ZERO_ZEROMV       23
+    NEWMV,          // NEW_NEWMV         24
+  };
+  assert(is_inter_compound_mode(mode));
+  return lut[mode];
+}
+
+static INLINE int have_newmv_in_inter_mode(PREDICTION_MODE mode) {
+  return (mode == NEWMV || mode == NEWFROMNEARMV || mode == NEW_NEWMV ||
+          mode == NEAREST_NEWMV || mode == NEW_NEARESTMV ||
+          mode == NEAR_NEWMV || mode == NEW_NEARMV);
+}
+#else
+
+static INLINE int have_newmv_in_inter_mode(PREDICTION_MODE mode) {
+  return (mode == NEWMV);
+}
+#endif  // CONFIG_EXT_INTER
+
+/* For keyframes, intra block modes are predicted by the (already decoded)
+   modes for the Y blocks to the left and above us; for interframes, there
+   is a single probability table. */
+
+typedef struct {
+  PREDICTION_MODE as_mode;
+  int_mv as_mv[2];  // first, second inter predictor motion vectors
+#if CONFIG_REF_MV
+  int_mv pred_mv_s8[2];
+#endif
+#if CONFIG_EXT_INTER
+  int_mv ref_mv[2];
+#endif  // CONFIG_EXT_INTER
+} b_mode_info;
+
+typedef int8_t MV_REFERENCE_FRAME;
+
+typedef struct {
+  // Number of base colors for Y (0) and UV (1)
+  uint8_t palette_size[2];
+// Value of base colors for Y, U, and V
+#if CONFIG_VP9_HIGHBITDEPTH
+  uint16_t palette_colors[3 * PALETTE_MAX_SIZE];
+#else
+  uint8_t palette_colors[3 * PALETTE_MAX_SIZE];
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+  // Only used by encoder to store the color index of the top left pixel.
+  // TODO(huisu): move this to encoder
+  uint8_t palette_first_color_idx[2];
+} PALETTE_MODE_INFO;
+
+#if CONFIG_EXT_INTRA
+typedef struct {
+  // 1: an ext intra mode is used; 0: otherwise.
+  uint8_t use_ext_intra_mode[PLANE_TYPES];
+  EXT_INTRA_MODE ext_intra_mode[PLANE_TYPES];
+} EXT_INTRA_MODE_INFO;
+#endif  // CONFIG_EXT_INTRA
+
+// This structure now relates to 8x8 block regions.
+typedef struct {
+  // Common for both INTER and INTRA blocks
+  BLOCK_SIZE sb_type;
+  PREDICTION_MODE mode;
+  TX_SIZE tx_size;
+#if CONFIG_VAR_TX
+  // TODO(jingning): This effectively assigned a separate entry for each
+  // 8x8 block. Apparently it takes much more space than needed.
+  TX_SIZE inter_tx_size[MAX_MIB_SIZE][MAX_MIB_SIZE];
+#endif
+  int8_t skip;
+  int8_t has_no_coeffs;
+  int8_t segment_id;
+#if CONFIG_SUPERTX
+  // Minimum of all segment IDs under the current supertx block.
+  int8_t segment_id_supertx;
+#endif                      // CONFIG_SUPERTX
+  int8_t seg_id_predicted;  // valid only when temporal_update is enabled
+
+  // Only for INTRA blocks
+  PREDICTION_MODE uv_mode;
+  PALETTE_MODE_INFO palette_mode_info;
+
+// Only for INTER blocks
+#if CONFIG_DUAL_FILTER
+  INTERP_FILTER interp_filter[4];
+#else
+  INTERP_FILTER interp_filter;
+#endif
+  MV_REFERENCE_FRAME ref_frame[2];
+  TX_TYPE tx_type;
+
+#if CONFIG_EXT_INTRA
+  EXT_INTRA_MODE_INFO ext_intra_mode_info;
+  int8_t angle_delta[2];
+  // To-Do (huisu): this may be replaced by interp_filter
+  INTRA_FILTER intra_filter;
+#endif  // CONFIG_EXT_INTRA
+
+#if CONFIG_EXT_INTER
+  INTERINTRA_MODE interintra_mode;
+  // TODO(debargha): Consolidate these flags
+  int use_wedge_interintra;
+  int interintra_wedge_index;
+  int interintra_wedge_sign;
+  int use_wedge_interinter;
+  int interinter_wedge_index;
+  int interinter_wedge_sign;
+#endif  // CONFIG_EXT_INTER
+  MOTION_VARIATION motion_variation;
+  int_mv mv[2];
+  int_mv pred_mv[2];
+#if CONFIG_REF_MV
+  uint8_t ref_mv_idx;
+#endif
+#if CONFIG_EXT_PARTITION_TYPES
+  PARTITION_TYPE partition;
+#endif
+#if CONFIG_NEW_QUANT
+  int dq_off_index;
+  int send_dq_bit;
+#endif  // CONFIG_NEW_QUANT
+  /* deringing gain *per-superblock* */
+  int8_t dering_gain;
+} MB_MODE_INFO;
+
+typedef struct MODE_INFO {
+  MB_MODE_INFO mbmi;
+  b_mode_info bmi[4];
+} MODE_INFO;
+
+static INLINE PREDICTION_MODE get_y_mode(const MODE_INFO *mi, int block) {
+  return mi->mbmi.sb_type < BLOCK_8X8 ? mi->bmi[block].as_mode : mi->mbmi.mode;
+}
+
+static INLINE int is_inter_block(const MB_MODE_INFO *mbmi) {
+  return mbmi->ref_frame[0] > INTRA_FRAME;
+}
+
+static INLINE int has_second_ref(const MB_MODE_INFO *mbmi) {
+  return mbmi->ref_frame[1] > INTRA_FRAME;
+}
+
+PREDICTION_MODE vp10_left_block_mode(const MODE_INFO *cur_mi,
+                                     const MODE_INFO *left_mi, int b);
+
+PREDICTION_MODE vp10_above_block_mode(const MODE_INFO *cur_mi,
+                                      const MODE_INFO *above_mi, int b);
+
+enum mv_precision { MV_PRECISION_Q3, MV_PRECISION_Q4 };
+
+struct buf_2d {
+  uint8_t *buf;
+  uint8_t *buf0;
+  int width;
+  int height;
+  int stride;
+};
+
+typedef struct macroblockd_plane {
+  tran_low_t *dqcoeff;
+  PLANE_TYPE plane_type;
+  int subsampling_x;
+  int subsampling_y;
+  struct buf_2d dst;
+  struct buf_2d pre[2];
+  ENTROPY_CONTEXT *above_context;
+  ENTROPY_CONTEXT *left_context;
+  int16_t seg_dequant[MAX_SEGMENTS][2];
+#if CONFIG_NEW_QUANT
+  dequant_val_type_nuq
+      seg_dequant_nuq[MAX_SEGMENTS][QUANT_PROFILES][COEF_BANDS];
+#endif
+  uint8_t *color_index_map;
+
+  // number of 4x4s in current block
+  uint16_t n4_w, n4_h;
+  // log2 of n4_w, n4_h
+  uint8_t n4_wl, n4_hl;
+
+#if CONFIG_AOM_QM
+  const qm_val_t *seg_iqmatrix[MAX_SEGMENTS][2][TX_SIZES];
+#endif
+  // encoder
+  const int16_t *dequant;
+#if CONFIG_NEW_QUANT
+  const dequant_val_type_nuq *dequant_val_nuq[QUANT_PROFILES];
+#endif  // CONFIG_NEW_QUANT
+#if CONFIG_AOM_QM
+  const qm_val_t *seg_qmatrix[MAX_SEGMENTS][2][TX_SIZES];
+#endif
+} MACROBLOCKD_PLANE;
+
+#define BLOCK_OFFSET(x, i) ((x) + (i)*16)
+
+typedef struct RefBuffer {
+  // TODO(dkovalev): idx is not really required and should be removed, now it
+  // is used in vp10_onyxd_if.c
+  int idx;
+  YV12_BUFFER_CONFIG *buf;
+  struct scale_factors sf;
+} RefBuffer;
+
+typedef struct macroblockd {
+  struct macroblockd_plane plane[MAX_MB_PLANE];
+  uint8_t bmode_blocks_wl;
+  uint8_t bmode_blocks_hl;
+
+  FRAME_COUNTS *counts;
+  TileInfo tile;
+
+  int mi_stride;
+
+  MODE_INFO **mi;
+  MODE_INFO *left_mi;
+  MODE_INFO *above_mi;
+  MB_MODE_INFO *left_mbmi;
+  MB_MODE_INFO *above_mbmi;
+
+  int up_available;
+  int left_available;
+
+  const vpx_prob (*partition_probs)[PARTITION_TYPES - 1];
+
+  /* Distance of MB away from frame edges */
+  int mb_to_left_edge;
+  int mb_to_right_edge;
+  int mb_to_top_edge;
+  int mb_to_bottom_edge;
+
+  FRAME_CONTEXT *fc;
+
+  /* pointers to reference frames */
+  RefBuffer *block_refs[2];
+
+  /* pointer to current frame */
+  const YV12_BUFFER_CONFIG *cur_buf;
+
+  ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
+  ENTROPY_CONTEXT left_context[MAX_MB_PLANE][2 * MAX_MIB_SIZE];
+
+  PARTITION_CONTEXT *above_seg_context;
+  PARTITION_CONTEXT left_seg_context[MAX_MIB_SIZE];
+
+#if CONFIG_VAR_TX
+  TXFM_CONTEXT *above_txfm_context;
+  TXFM_CONTEXT *left_txfm_context;
+  TXFM_CONTEXT left_txfm_context_buffer[MAX_MIB_SIZE];
+
+  TX_SIZE max_tx_size;
+#if CONFIG_SUPERTX
+  TX_SIZE supertx_size;
+#endif
+#endif
+
+  // dimension in the unit of 8x8 block of the current block
+  uint8_t n8_w, n8_h;
+
+#if CONFIG_REF_MV
+  uint8_t ref_mv_count[MODE_CTX_REF_FRAMES];
+  CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
+  uint8_t is_sec_rect;
+#endif
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  /* Bit depth: 8, 10, 12 */
+  int bd;
+#endif
+
+  int lossless[MAX_SEGMENTS];
+  int corrupted;
+
+  struct vpx_internal_error_info *error_info;
+#if CONFIG_GLOBAL_MOTION
+  Global_Motion_Params *global_motion;
+#endif  // CONFIG_GLOBAL_MOTION
+} MACROBLOCKD;
+
+static INLINE BLOCK_SIZE get_subsize(BLOCK_SIZE bsize,
+                                     PARTITION_TYPE partition) {
+  if (partition == PARTITION_INVALID)
+    return PARTITION_INVALID;
+  else
+    return subsize_lookup[partition][bsize];
+}
+
+static const TX_TYPE intra_mode_to_tx_type_context[INTRA_MODES] = {
+  DCT_DCT,    // DC
+  ADST_DCT,   // V
+  DCT_ADST,   // H
+  DCT_DCT,    // D45
+  ADST_ADST,  // D135
+  ADST_DCT,   // D117
+  DCT_ADST,   // D153
+  DCT_ADST,   // D207
+  ADST_DCT,   // D63
+  ADST_ADST,  // TM
+};
+
+#if CONFIG_SUPERTX
+static INLINE int supertx_enabled(const MB_MODE_INFO *mbmi) {
+  return (int)txsize_sqr_map[mbmi->tx_size] >
+         VPXMIN(b_width_log2_lookup[mbmi->sb_type],
+                b_height_log2_lookup[mbmi->sb_type]);
+}
+#endif  // CONFIG_SUPERTX
+
+static INLINE int get_tx1d_width(TX_SIZE tx_size) {
+  return num_4x4_blocks_wide_txsize_lookup[tx_size] << 2;
+}
+
+static INLINE int get_tx1d_height(TX_SIZE tx_size) {
+  return num_4x4_blocks_high_txsize_lookup[tx_size] << 2;
+}
+
+static INLINE int get_tx2d_size(TX_SIZE tx_size) {
+  return num_4x4_blocks_txsize_lookup[tx_size] << 4;
+}
+
+#if CONFIG_EXT_TX
+#define ALLOW_INTRA_EXT_TX 1
+// whether masked transforms are used for 32X32
+#define USE_MSKTX_FOR_32X32 0
+#define USE_REDUCED_TXSET_FOR_16X16 1
+
+static const int num_ext_tx_set_inter[EXT_TX_SETS_INTER] = { 1, 16, 12, 2 };
+static const int num_ext_tx_set_intra[EXT_TX_SETS_INTRA] = { 1, 7, 5 };
+
+#if EXT_TX_SIZES == 4
+static INLINE int get_ext_tx_set(TX_SIZE tx_size, BLOCK_SIZE bs, int is_inter) {
+  tx_size = txsize_sqr_map[tx_size];
+  if (tx_size > TX_32X32 || bs < BLOCK_8X8) return 0;
+#if USE_REDUCED_TXSET_FOR_16X16
+  if (tx_size == TX_32X32) return is_inter ? 3 - USE_MSKTX_FOR_32X32 : 0;
+  return (tx_size == TX_16X16 ? 2 : 1);
+#else
+  if (tx_size == TX_32X32) return is_inter ? 3 - 2 * USE_MSKTX_FOR_32X32 : 0;
+  return (tx_size == TX_16X16 && !is_inter ? 2 : 1);
+#endif  // USE_REDUCED_TXSET_FOR_16X16
+}
+
+static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA][TX_SIZES] = {
+  { 0, 0, 0, 0 },  // unused
+  { 1, 1, 0, 0 },
+  { 0, 0, 1, 0 },
+};
+
+static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER][TX_SIZES] = {
+  { 0, 0, 0, 0 },  // unused
+  { 1, 1, (!USE_REDUCED_TXSET_FOR_16X16), USE_MSKTX_FOR_32X32 },
+  { 0, 0, USE_REDUCED_TXSET_FOR_16X16, 0 },
+  { 0, 0, 0, (!USE_MSKTX_FOR_32X32) },
+};
+
+#else  // EXT_TX_SIZES == 4
+
+static INLINE int get_ext_tx_set(TX_SIZE tx_size, BLOCK_SIZE bs, int is_inter) {
+  (void)is_inter;
+  tx_size = txsize_sqr_map[tx_size];
+  if (tx_size > TX_32X32 || bs < BLOCK_8X8) return 0;
+  if (tx_size == TX_32X32) return 0;
+#if USE_REDUCED_TXSET_FOR_16X16
+  return (tx_size == TX_16X16 ? 2 : 1);
+#else
+  return (tx_size == TX_16X16 && !is_inter ? 2 : 1);
+#endif  // USE_REDUCED_TXSET_FOR_16X16
+}
+
+static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA][TX_SIZES] = {
+  { 0, 0, 0, 0 },  // unused
+  { 1, 1, 0, 0 },
+  { 0, 0, 1, 0 },
+};
+
+static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER][TX_SIZES] = {
+  { 0, 0, 0, 0 },  // unused
+  { 1, 1, (!USE_REDUCED_TXSET_FOR_16X16), 0 },
+  { 0, 0, USE_REDUCED_TXSET_FOR_16X16, 0 },
+  { 0, 0, 0, 1 },
+};
+#endif  // EXT_TX_SIZES == 4
+
+// Transform types used in each intra set
+static const int ext_tx_used_intra[EXT_TX_SETS_INTRA][TX_TYPES] = {
+  { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+  { 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0 },
+  { 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
+};
+
+// Transform types used in each inter set
+static const int ext_tx_used_inter[EXT_TX_SETS_INTER][TX_TYPES] = {
+  { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 },
+  { 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
+};
+
+// 1D Transforms used in inter set, this needs to be changed if
+// ext_tx_used_inter is changed
+static const int ext_tx_used_inter_1D[EXT_TX_SETS_INTER][TX_TYPES_1D] = {
+  { 1, 0, 0, 0 }, { 1, 1, 1, 1 }, { 1, 1, 1, 1 }, { 1, 0, 0, 1 },
+};
+
+static INLINE int get_ext_tx_types(TX_SIZE tx_size, BLOCK_SIZE bs,
+                                   int is_inter) {
+  const int set = get_ext_tx_set(tx_size, bs, is_inter);
+  return is_inter ? num_ext_tx_set_inter[set] : num_ext_tx_set_intra[set];
+}
+
+#if CONFIG_RECT_TX
+static INLINE int is_rect_tx_allowed_bsize(BLOCK_SIZE bsize) {
+  static const char LUT[BLOCK_SIZES] = {
+    0,  // BLOCK_4X4
+    1,  // BLOCK_4X8
+    1,  // BLOCK_8X4
+    0,  // BLOCK_8X8
+    1,  // BLOCK_8X16
+    1,  // BLOCK_16X8
+    0,  // BLOCK_16X16
+    1,  // BLOCK_16X32
+    1,  // BLOCK_32X16
+    0,  // BLOCK_32X32
+    0,  // BLOCK_32X64
+    0,  // BLOCK_64X32
+    0,  // BLOCK_64X64
+#if CONFIG_EXT_PARTITION
+    0,  // BLOCK_64X128
+    0,  // BLOCK_128X64
+    0,  // BLOCK_128X128
+#endif  // CONFIG_EXT_PARTITION
+  };
+
+  return LUT[bsize];
+}
+
+static INLINE int is_rect_tx_allowed(const MB_MODE_INFO *mbmi) {
+  return is_inter_block(mbmi) && is_rect_tx_allowed_bsize(mbmi->sb_type);
+}
+
+static INLINE int is_rect_tx(TX_SIZE tx_size) { return tx_size >= TX_SIZES; }
+#endif  // CONFIG_RECT_TX
+#endif  // CONFIG_EXT_TX
+
+static INLINE TX_SIZE tx_size_from_tx_mode(BLOCK_SIZE bsize, TX_MODE tx_mode,
+                                           int is_inter) {
+  const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[tx_mode];
+  const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+  if (!is_inter) {
+    return VPXMIN(max_tx_size, largest_tx_size);
+  } else {
+    const TX_SIZE max_rect_tx_size = max_txsize_rect_lookup[bsize];
+    if (txsize_sqr_up_map[max_rect_tx_size] <= largest_tx_size) {
+      return max_rect_tx_size;
+    } else {
+      return largest_tx_size;
+    }
+  }
+#else
+  (void)is_inter;
+  return VPXMIN(max_tx_size, largest_tx_size);
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
+}
+
+#if CONFIG_EXT_INTRA
+#define ALLOW_FILTER_INTRA_MODES 1
+#define ANGLE_STEP 3
+#define MAX_ANGLE_DELTAS 3
+
+extern const int16_t dr_intra_derivative[90];
+
+static const uint8_t mode_to_angle_map[INTRA_MODES] = {
+  0, 90, 180, 45, 135, 111, 157, 203, 67, 0,
+};
+
+static const TX_TYPE filter_intra_mode_to_tx_type_lookup[FILTER_INTRA_MODES] = {
+  DCT_DCT,    // FILTER_DC
+  ADST_DCT,   // FILTER_V
+  DCT_ADST,   // FILTER_H
+  DCT_DCT,    // FILTER_D45
+  ADST_ADST,  // FILTER_D135
+  ADST_DCT,   // FILTER_D117
+  DCT_ADST,   // FILTER_D153
+  DCT_ADST,   // FILTER_D207
+  ADST_DCT,   // FILTER_D63
+  ADST_ADST,  // FILTER_TM
+};
+
+int vp10_is_intra_filter_switchable(int angle);
+#endif  // CONFIG_EXT_INTRA
+
+#if CONFIG_EXT_TILE
+#define FIXED_TX_TYPE 1
+#else
+#define FIXED_TX_TYPE 0
+#endif
+
+static INLINE TX_TYPE get_default_tx_type(PLANE_TYPE plane_type,
+                                          const MACROBLOCKD *xd, int block_idx,
+                                          TX_SIZE tx_size) {
+  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+
+  if (is_inter_block(mbmi) || plane_type != PLANE_TYPE_Y ||
+      xd->lossless[mbmi->segment_id] || tx_size >= TX_32X32)
+    return DCT_DCT;
+
+  return intra_mode_to_tx_type_context[plane_type == PLANE_TYPE_Y
+                                           ? get_y_mode(xd->mi[0], block_idx)
+                                           : mbmi->uv_mode];
+}
+
+static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, const MACROBLOCKD *xd,
+                                  int block_idx, TX_SIZE tx_size) {
+  const MODE_INFO *const mi = xd->mi[0];
+  const MB_MODE_INFO *const mbmi = &mi->mbmi;
+
+  if (FIXED_TX_TYPE)
+    return get_default_tx_type(plane_type, xd, block_idx, tx_size);
+
+#if CONFIG_EXT_INTRA
+  if (!is_inter_block(mbmi)) {
+    const int use_ext_intra_mode_info =
+        mbmi->ext_intra_mode_info.use_ext_intra_mode[plane_type];
+    const EXT_INTRA_MODE ext_intra_mode =
+        mbmi->ext_intra_mode_info.ext_intra_mode[plane_type];
+    const PREDICTION_MODE mode = (plane_type == PLANE_TYPE_Y)
+                                     ? get_y_mode(mi, block_idx)
+                                     : mbmi->uv_mode;
+
+    if (xd->lossless[mbmi->segment_id] || tx_size >= TX_32X32) return DCT_DCT;
+
+#if CONFIG_EXT_TX
+#if ALLOW_INTRA_EXT_TX
+    if (mbmi->sb_type >= BLOCK_8X8 && plane_type == PLANE_TYPE_Y)
+      return mbmi->tx_type;
+#endif  // ALLOW_INTRA_EXT_TX
+#endif  // CONFIG_EXT_TX
+
+    if (use_ext_intra_mode_info)
+      return filter_intra_mode_to_tx_type_lookup[ext_intra_mode];
+
+    if (mode == DC_PRED) {
+      return DCT_DCT;
+    } else if (mode == TM_PRED) {
+      return ADST_ADST;
+    } else {
+      int angle = mode_to_angle_map[mode];
+      if (mbmi->sb_type >= BLOCK_8X8)
+        angle += mbmi->angle_delta[plane_type] * ANGLE_STEP;
+      assert(angle > 0 && angle < 270);
+      if (angle == 135)
+        return ADST_ADST;
+      else if (angle < 45 || angle > 225)
+        return DCT_DCT;
+      else if (angle < 135)
+        return ADST_DCT;
+      else
+        return DCT_ADST;
+    }
+  }
+#endif  // CONFIG_EXT_INTRA
+
+#if CONFIG_EXT_TX
+#if EXT_TX_SIZES == 4
+  if (xd->lossless[mbmi->segment_id] || txsize_sqr_map[tx_size] > TX_32X32 ||
+      (txsize_sqr_map[tx_size] >= TX_32X32 && !is_inter_block(mbmi)))
+#else
+  if (xd->lossless[mbmi->segment_id] || txsize_sqr_map[tx_size] >= TX_32X32)
+#endif
+    return DCT_DCT;
+  if (mbmi->sb_type >= BLOCK_8X8) {
+    if (plane_type == PLANE_TYPE_Y) {
+#if !ALLOW_INTRA_EXT_TX
+      if (is_inter_block(mbmi))
+#endif  // ALLOW_INTRA_EXT_TX
+        return mbmi->tx_type;
+    }
+    if (is_inter_block(mbmi))
+      // UV Inter only
+      return (mbmi->tx_type == IDTX && txsize_sqr_map[tx_size] == TX_32X32)
+                 ? DCT_DCT
+                 : mbmi->tx_type;
+  }
+
+  // Sub8x8-Inter/Intra OR UV-Intra
+  if (is_inter_block(mbmi))  // Sub8x8-Inter
+    return DCT_DCT;
+  else  // Sub8x8 Intra OR UV-Intra
+    return intra_mode_to_tx_type_context[plane_type == PLANE_TYPE_Y
+                                             ? get_y_mode(mi, block_idx)
+                                             : mbmi->uv_mode];
+#else   // CONFIG_EXT_TX
+  (void)block_idx;
+  if (plane_type != PLANE_TYPE_Y || xd->lossless[mbmi->segment_id] ||
+      txsize_sqr_map[tx_size] >= TX_32X32)
+    return DCT_DCT;
+  return mbmi->tx_type;
+#endif  // CONFIG_EXT_TX
+}
+
+void vp10_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y);
+
+static INLINE TX_SIZE get_uv_tx_size_impl(TX_SIZE y_tx_size, BLOCK_SIZE bsize,
+                                          int xss, int yss) {
+  if (bsize < BLOCK_8X8) {
+    return TX_4X4;
+  } else {
+    const BLOCK_SIZE plane_bsize = ss_size_lookup[bsize][xss][yss];
+    return VPXMIN(txsize_sqr_map[y_tx_size], max_txsize_lookup[plane_bsize]);
+  }
+}
+
+static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi,
+                                     const struct macroblockd_plane *pd) {
+#if CONFIG_SUPERTX
+  if (supertx_enabled(mbmi))
+    return uvsupertx_size_lookup[txsize_sqr_map[mbmi->tx_size]]
+                                [pd->subsampling_x][pd->subsampling_y];
+#endif  // CONFIG_SUPERTX
+  return get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type, pd->subsampling_x,
+                             pd->subsampling_y);
+}
+
+static INLINE BLOCK_SIZE
+get_plane_block_size(BLOCK_SIZE bsize, const struct macroblockd_plane *pd) {
+  return ss_size_lookup[bsize][pd->subsampling_x][pd->subsampling_y];
+}
+
+static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) {
+  int i;
+  for (i = 0; i < MAX_MB_PLANE; i++) {
+    struct macroblockd_plane *const pd = &xd->plane[i];
+    const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+    memset(pd->above_context, 0,
+           sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide_lookup[plane_bsize]);
+    memset(pd->left_context, 0,
+           sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high_lookup[plane_bsize]);
+  }
+}
+
+typedef void (*foreach_transformed_block_visitor)(int plane, int block,
+                                                  int blk_row, int blk_col,
+                                                  BLOCK_SIZE plane_bsize,
+                                                  TX_SIZE tx_size, void *arg);
+
+void vp10_foreach_transformed_block_in_plane(
+    const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane,
+    foreach_transformed_block_visitor visit, void *arg);
+
+void vp10_foreach_transformed_block(const MACROBLOCKD *const xd,
+                                    BLOCK_SIZE bsize,
+                                    foreach_transformed_block_visitor visit,
+                                    void *arg);
+
+void vp10_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
+                       BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob,
+                       int aoff, int loff);
+
+#if CONFIG_EXT_INTER
+static INLINE int is_interintra_allowed_bsize(const BLOCK_SIZE bsize) {
+  // TODO(debargha): Should this be bsize < BLOCK_LARGEST?
+  return (bsize >= BLOCK_8X8) && (bsize < BLOCK_64X64);
+}
+
+static INLINE int is_interintra_allowed_mode(const PREDICTION_MODE mode) {
+  return (mode >= NEARESTMV) && (mode <= NEWMV);
+}
+
+static INLINE int is_interintra_allowed_ref(const MV_REFERENCE_FRAME rf[2]) {
+  return (rf[0] > INTRA_FRAME) && (rf[1] <= INTRA_FRAME);
+}
+
+static INLINE int is_interintra_allowed(const MB_MODE_INFO *mbmi) {
+  return is_interintra_allowed_bsize(mbmi->sb_type) &&
+         is_interintra_allowed_mode(mbmi->mode) &&
+         is_interintra_allowed_ref(mbmi->ref_frame);
+}
+
+static INLINE int is_interintra_allowed_bsize_group(const int group) {
+  int i;
+  for (i = 0; i < BLOCK_SIZES; i++) {
+    if (size_group_lookup[i] == group && is_interintra_allowed_bsize(i))
+      return 1;
+  }
+  return 0;
+}
+
+static INLINE int is_interintra_pred(const MB_MODE_INFO *mbmi) {
+  return (mbmi->ref_frame[1] == INTRA_FRAME) && is_interintra_allowed(mbmi);
+}
+#endif  // CONFIG_EXT_INTER
+
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+static INLINE int is_motvar_allowed(const MB_MODE_INFO *mbmi) {
+#if CONFIG_EXT_INTER
+  return (mbmi->sb_type >= BLOCK_8X8 && mbmi->ref_frame[1] != INTRA_FRAME);
+#else
+  return (mbmi->sb_type >= BLOCK_8X8);
+#endif  // CONFIG_EXT_INTER
+}
+
+#if CONFIG_OBMC
+static INLINE int is_neighbor_overlappable(const MB_MODE_INFO *mbmi) {
+  return (is_inter_block(mbmi));
+}
+#endif  // CONFIG_OBMC
+#endif  // CONFIG_OBMC || CONFIG_WARPED_MOTION
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_BLOCKD_H_
diff --git a/av1/common/clpf.c b/av1/common/clpf.c
new file mode 100644
index 0000000..bba40cb
--- /dev/null
+++ b/av1/common/clpf.c
@@ -0,0 +1,99 @@
+/*
+Copyright (c) 2016 Cisco Systems
+(Replace with proper AOM header)
+*/
+
+#include "av1/common/clpf.h"
+
+// Apply the filter on a single block
+static void clpf_block(const uint8_t *src, uint8_t *dst, int sstride,
+                       int dstride, int has_top, int has_left, int has_bottom,
+                       int has_right, int width, int height) {
+  int x, y;
+
+  for (y = 0; y < height; y++) {
+    for (x = 0; x < width; x++) {
+      int X = src[(y + 0) * sstride + x + 0];
+      int A = has_top ? src[(y - 1) * sstride + x + 0] : X;
+      int B = has_left ? src[(y + 0) * sstride + x - 1] : X;
+      int C = has_right ? src[(y + 0) * sstride + x + 1] : X;
+      int D = has_bottom ? src[(y + 1) * sstride + x + 0] : X;
+      int delta = ((A > X) + (B > X) + (C > X) + (D > X) > 2) -
+                  ((A < X) + (B < X) + (C < X) + (D < X) > 2);
+      dst[y * dstride + x] = X + delta;
+    }
+  }
+}
+
+#define BS (MI_SIZE * MAX_MIB_SIZE)
+
+// Iterate over blocks within a superblock
+static void vp10_clpf_sb(const YV12_BUFFER_CONFIG *frame_buffer,
+                         const VP10_COMMON *cm, MACROBLOCKD *xd,
+                         MODE_INFO *const *mi_8x8, int xpos, int ypos) {
+  // Temporary buffer (to allow SIMD parallelism)
+  uint8_t buf_unaligned[BS * BS + 15];
+  uint8_t *buf = (uint8_t *)(((intptr_t)buf_unaligned + 15) & ~15);
+  int x, y, p;
+
+  for (p = 0; p < (CLPF_FILTER_ALL_PLANES ? MAX_MB_PLANE : 1); p++) {
+    for (y = 0; y < MAX_MIB_SIZE && ypos + y < cm->mi_rows; y++) {
+      for (x = 0; x < MAX_MIB_SIZE && xpos + x < cm->mi_cols; x++) {
+        const MB_MODE_INFO *mbmi =
+            &mi_8x8[(ypos + y) * cm->mi_stride + xpos + x]->mbmi;
+
+        // Do not filter if there is no residual
+        if (!mbmi->skip) {
+          // Do not filter frame edges
+          int has_top = ypos + y > 0;
+          int has_left = xpos + x > 0;
+          int has_bottom = ypos + y < cm->mi_rows - 1;
+          int has_right = xpos + x < cm->mi_cols - 1;
+#if CLPF_ALLOW_BLOCK_PARALLELISM
+          // Do not filter superblock edges
+          has_top &= !!y;
+          has_left &= !!x;
+          has_bottom &= y != MAX_MIB_SIZE - 1;
+          has_right &= x != MAX_MIB_SIZE - 1;
+#endif
+          vp10_setup_dst_planes(xd->plane, frame_buffer, ypos + y, xpos + x);
+          clpf_block(
+              xd->plane[p].dst.buf, CLPF_ALLOW_PIXEL_PARALLELISM
+                                        ? buf + y * MI_SIZE * BS + x * MI_SIZE
+                                        : xd->plane[p].dst.buf,
+              xd->plane[p].dst.stride,
+              CLPF_ALLOW_PIXEL_PARALLELISM ? BS : xd->plane[p].dst.stride,
+              has_top, has_left, has_bottom, has_right,
+              MI_SIZE >> xd->plane[p].subsampling_x,
+              MI_SIZE >> xd->plane[p].subsampling_y);
+        }
+      }
+    }
+#if CLPF_ALLOW_PIXEL_PARALLELISM
+    for (y = 0; y < MAX_MIB_SIZE && ypos + y < cm->mi_rows; y++) {
+      for (x = 0; x < MAX_MIB_SIZE && xpos + x < cm->mi_cols; x++) {
+        const MB_MODE_INFO *mbmi =
+            &mi_8x8[(ypos + y) * cm->mi_stride + xpos + x]->mbmi;
+        vp10_setup_dst_planes(xd->plane, frame_buffer, ypos + y, xpos + x);
+        if (!mbmi->skip) {
+          int i = 0;
+          for (i = 0; i<MI_SIZE>> xd->plane[p].subsampling_y; i++)
+            memcpy(xd->plane[p].dst.buf + i * xd->plane[p].dst.stride,
+                   buf + (y * MI_SIZE + i) * BS + x * MI_SIZE,
+                   MI_SIZE >> xd->plane[p].subsampling_x);
+        }
+      }
+    }
+#endif
+  }
+}
+
+// Iterate over the superblocks of an entire frame
+void vp10_clpf_frame(const YV12_BUFFER_CONFIG *frame, const VP10_COMMON *cm,
+                     MACROBLOCKD *xd) {
+  int x, y;
+
+  for (y = 0; y < cm->mi_rows; y += MAX_MIB_SIZE)
+    for (x = 0; x < cm->mi_cols; x += MAX_MIB_SIZE)
+      vp10_clpf_sb(frame, cm, xd, cm->mi_grid_visible, x, y);
+}
diff --git a/av1/common/clpf.h b/av1/common/clpf.h
new file mode 100644
index 0000000..5b9d55b
--- /dev/null
+++ b/av1/common/clpf.h
@@ -0,0 +1,22 @@
+/*
+Copyright (c) 2016, Cisco Systems
+(Replace with proper AOM header)
+*/
+
+#ifndef VP10_COMMON_CLPF_H_
+#define VP10_COMMON_CLPF_H_
+
+#include "av1/common/reconinter.h"
+
+// Configuration
+#define CLPF_ALLOW_PIXEL_PARALLELISM \
+  1  // 1 = SIMD friendly (adds a buffer requirement)
+#define CLPF_ALLOW_BLOCK_PARALLELISM \
+  0  // 1 = MT friendly (degrades quality slighty)
+#define CLPF_FILTER_ALL_PLANES \
+  0  // 1 = filter both luma and chroma, 0 = filter only luma
+
+void vp10_clpf_frame(const YV12_BUFFER_CONFIG *frame, const VP10_COMMON *cm,
+                     MACROBLOCKD *xd);
+
+#endif
diff --git a/av1/common/common.h b/av1/common/common.h
new file mode 100644
index 0000000..4e30034
--- /dev/null
+++ b/av1/common/common.h
@@ -0,0 +1,80 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_COMMON_H_
+#define VP10_COMMON_COMMON_H_
+
+/* Interface header for common constant data structures and lookup tables */
+
+#include <assert.h>
+
+#include "./vpx_config.h"
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_mem/vpx_mem.h"
+#include "aom/vpx_integer.h"
+#include "aom_ports/bitops.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define PI 3.141592653589793238462643383279502884
+
+// Only need this for fixed-size arrays, for structs just assign.
+#define vp10_copy(dest, src)             \
+  {                                      \
+    assert(sizeof(dest) == sizeof(src)); \
+    memcpy(dest, src, sizeof(src));      \
+  }
+
+// Use this for variably-sized arrays.
+#define vp10_copy_array(dest, src, n)          \
+  {                                            \
+    assert(sizeof(*(dest)) == sizeof(*(src))); \
+    memcpy(dest, src, n * sizeof(*(src)));     \
+  }
+
+#define vp10_zero(dest) memset(&(dest), 0, sizeof(dest))
+#define vp10_zero_array(dest, n) memset(dest, 0, n * sizeof(*(dest)))
+
+static INLINE int get_unsigned_bits(unsigned int num_values) {
+  return num_values > 0 ? get_msb(num_values) + 1 : 0;
+}
+
+#if CONFIG_DEBUG
+#define CHECK_MEM_ERROR(cm, lval, expr)                                     \
+  do {                                                                      \
+    lval = (expr);                                                          \
+    if (!lval)                                                              \
+      vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,                   \
+                         "Failed to allocate " #lval " at %s:%d", __FILE__, \
+                         __LINE__);                                         \
+  } while (0)
+#else
+#define CHECK_MEM_ERROR(cm, lval, expr)                   \
+  do {                                                    \
+    lval = (expr);                                        \
+    if (!lval)                                            \
+      vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, \
+                         "Failed to allocate " #lval);    \
+  } while (0)
+#endif
+// TODO(yaowu: validate the usage of these codes or develop new ones.)
+#define VP10_SYNC_CODE_0 0x49
+#define VP10_SYNC_CODE_1 0x83
+#define VP10_SYNC_CODE_2 0x43
+
+#define VPX_FRAME_MARKER 0x2
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_COMMON_H_
diff --git a/av1/common/common_data.h b/av1/common/common_data.h
new file mode 100644
index 0000000..4348f08
--- /dev/null
+++ b/av1/common/common_data.h
@@ -0,0 +1,586 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_COMMON_DATA_H_
+#define VP10_COMMON_COMMON_DATA_H_
+
+#include "av1/common/enums.h"
+#include "aom/vpx_integer.h"
+#include "aom_dsp/vpx_dsp_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if CONFIG_EXT_PARTITION
+#define IF_EXT_PARTITION(...) __VA_ARGS__
+#else
+#define IF_EXT_PARTITION(...)
+#endif
+
+// Log 2 conversion lookup tables for block width and height
+static const uint8_t b_width_log2_lookup[BLOCK_SIZES] = {
+  0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, IF_EXT_PARTITION(4, 5, 5)
+};
+static const uint8_t b_height_log2_lookup[BLOCK_SIZES] = {
+  0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4, IF_EXT_PARTITION(5, 4, 5)
+};
+// Log 2 conversion lookup tables for modeinfo width and height
+static const uint8_t mi_width_log2_lookup[BLOCK_SIZES] = {
+  0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, IF_EXT_PARTITION(3, 4, 4)
+};
+static const uint8_t mi_height_log2_lookup[BLOCK_SIZES] = {
+  0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3, IF_EXT_PARTITION(4, 3, 4)
+};
+
+// Width/height lookup tables in units of varios block sizes
+static const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES] = {
+  1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16, IF_EXT_PARTITION(16, 32, 32)
+};
+static const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES] = {
+  1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16, IF_EXT_PARTITION(32, 16, 32)
+};
+static const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES] = {
+  1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, IF_EXT_PARTITION(8, 16, 16)
+};
+static const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES] = {
+  1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, IF_EXT_PARTITION(16, 8, 16)
+};
+static const uint8_t num_16x16_blocks_wide_lookup[BLOCK_SIZES] = {
+  1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 4, 4, IF_EXT_PARTITION(4, 8, 8)
+};
+static const uint8_t num_16x16_blocks_high_lookup[BLOCK_SIZES] = {
+  1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, IF_EXT_PARTITION(8, 4, 8)
+};
+
+static const uint8_t num_4x4_blocks_txsize_lookup[TX_SIZES_ALL] = {
+  1, 4, 16, 64,
+#if CONFIG_EXT_TX
+  2, 2, 8,  8,  32, 32
+#endif  // CONFIG_EXT_TX
+};
+static const uint8_t num_4x4_blocks_wide_txsize_lookup[TX_SIZES_ALL] = {
+  1, 2, 4, 8,
+#if CONFIG_EXT_TX
+  1, 2, 2, 4, 4, 8
+#endif  // CONFIG_EXT_TX
+};
+static const uint8_t num_4x4_blocks_high_txsize_lookup[TX_SIZES_ALL] = {
+  1, 2, 4, 8,
+#if CONFIG_EXT_TX
+  2, 1, 4, 2, 8, 4
+#endif  // CONFIG_EXT_TX
+};
+
+static const uint8_t num_4x4_blocks_txsize_log2_lookup[TX_SIZES_ALL] = {
+  0, 2, 4, 6,
+#if CONFIG_EXT_TX
+  1, 1, 3, 3, 5, 5
+#endif  // CONFIG_EXT_TX
+};
+static const uint8_t num_4x4_blocks_wide_txsize_log2_lookup[TX_SIZES_ALL] = {
+  0, 1, 2, 3,
+#if CONFIG_EXT_TX
+  0, 1, 1, 2, 2, 3
+#endif  // CONFIG_EXT_TX
+};
+static const uint8_t num_4x4_blocks_high_txsize_log2_lookup[TX_SIZES_ALL] = {
+  0, 1, 2, 3,
+#if CONFIG_EXT_TX
+  1, 0, 2, 1, 3, 2
+#endif  // CONFIG_EXT_TX
+};
+
+// VPXMIN(3, VPXMIN(b_width_log2(bsize), b_height_log2(bsize)))
+static const uint8_t size_group_lookup[BLOCK_SIZES] = {
+  0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, IF_EXT_PARTITION(3, 3, 3)
+};
+
+static const uint8_t num_pels_log2_lookup[BLOCK_SIZES] = {
+  4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12, IF_EXT_PARTITION(13, 13, 14)
+};
+
+/* clang-format off */
+static const PARTITION_TYPE
+  partition_lookup[MAX_SB_SIZE_LOG2 - 1][BLOCK_SIZES] = {
+  {     // 4X4 ->
+    //                                    4X4
+                                          PARTITION_NONE,
+    // 4X8,            8X4,               8X8
+    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+    // 8X16,           16X8,              16X16
+    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+    // 16X32,          32X16,             32X32
+    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+    // 32X64,          64X32,             64X64
+    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+#if CONFIG_EXT_PARTITION
+    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+#endif  // CONFIG_EXT_PARTITION
+  }, {  // 8X8 ->
+    //                                    4X4
+                                          PARTITION_SPLIT,
+    // 4X8,            8X4,               8X8
+    PARTITION_VERT,    PARTITION_HORZ,    PARTITION_NONE,
+    // 8X16,           16X8,              16X16
+    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+    // 16X32,          32X16,             32X32
+    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+    // 32X64,          64X32,             64X64
+    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+#if CONFIG_EXT_PARTITION
+    // 64x128,         128x64,            128x128
+    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+#endif  // CONFIG_EXT_PARTITION
+  }, {  // 16X16 ->
+    //                                    4X4
+                                          PARTITION_SPLIT,
+    // 4X8,            8X4,               8X8
+    PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT,
+    // 8X16,           16X8,              16X16
+    PARTITION_VERT,    PARTITION_HORZ,    PARTITION_NONE,
+    // 16X32,          32X16,             32X32
+    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+    // 32X64,          64X32,             64X64
+    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+#if CONFIG_EXT_PARTITION
+    // 64x128,         128x64,            128x128
+    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+#endif  // CONFIG_EXT_PARTITION
+  }, {  // 32X32 ->
+    //                                    4X4
+                                          PARTITION_SPLIT,
+    // 4X8,            8X4,               8X8
+    PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT,
+    // 8X16,           16X8,              16X16
+    PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT,
+    // 16X32,          32X16,             32X32
+    PARTITION_VERT,    PARTITION_HORZ,    PARTITION_NONE,
+    // 32X64,          64X32,             64X64
+    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+#if CONFIG_EXT_PARTITION
+    // 64x128,         128x64,            128x128
+    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+#endif  // CONFIG_EXT_PARTITION
+  }, {  // 64X64 ->
+    //                                    4X4
+                                          PARTITION_SPLIT,
+    // 4X8,            8X4,               8X8
+    PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT,
+    // 8X16,           16X8,              16X16
+    PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT,
+    // 16X32,          32X16,             32X32
+    PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT,
+    // 32X64,          64X32,             64X64
+    PARTITION_VERT,    PARTITION_HORZ,    PARTITION_NONE,
+#if CONFIG_EXT_PARTITION
+    // 64x128,         128x64,            128x128
+    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+  }, {  // 128x128 ->
+    //                                    4X4
+                                          PARTITION_SPLIT,
+    // 4X8,            8X4,               8X8
+    PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT,
+    // 8X16,           16X8,              16X16
+    PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT,
+    // 16X32,          32X16,             32X32
+    PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT,
+    // 32X64,          64X32,             64X64
+    PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT,
+    // 64x128,         128x64,            128x128
+    PARTITION_VERT,    PARTITION_HORZ,    PARTITION_NONE,
+#endif  // CONFIG_EXT_PARTITION
+  }
+};
+
+#if CONFIG_EXT_PARTITION_TYPES
+static const BLOCK_SIZE subsize_lookup[EXT_PARTITION_TYPES][BLOCK_SIZES] =
+#else
+static const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES] =
+#endif  // CONFIG_EXT_PARTITION_TYPES
+{
+  {     // PARTITION_NONE
+    //                            4X4
+                                  BLOCK_4X4,
+    // 4X8,        8X4,           8X8
+    BLOCK_4X8,     BLOCK_8X4,     BLOCK_8X8,
+    // 8X16,       16X8,          16X16
+    BLOCK_8X16,    BLOCK_16X8,    BLOCK_16X16,
+    // 16X32,      32X16,         32X32
+    BLOCK_16X32,   BLOCK_32X16,   BLOCK_32X32,
+    // 32X64,      64X32,         64X64
+    BLOCK_32X64,   BLOCK_64X32,   BLOCK_64X64,
+#if CONFIG_EXT_PARTITION
+    // 64x128,     128x64,        128x128
+    BLOCK_64X128,  BLOCK_128X64,  BLOCK_128X128,
+#endif  // CONFIG_EXT_PARTITION
+  }, {  // PARTITION_HORZ
+    //                            4X4
+                                  BLOCK_INVALID,
+    // 4X8,        8X4,           8X8
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4,
+    // 8X16,       16X8,          16X16
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8,
+    // 16X32,      32X16,         32X32
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16,
+    // 32X64,      64X32,         64X64
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32,
+#if CONFIG_EXT_PARTITION
+    // 64x128,     128x64,        128x128
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64,
+#endif  // CONFIG_EXT_PARTITION
+  }, {  // PARTITION_VERT
+    //                            4X4
+                                  BLOCK_INVALID,
+    // 4X8,        8X4,           8X8
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8,
+    // 8X16,       16X8,          16X16
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16,
+    // 16X32,      32X16,         32X32
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32,
+    // 32X64,      64X32,         64X64
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64,
+#if CONFIG_EXT_PARTITION
+    // 64x128,     128x64,        128x128
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128,
+#endif  // CONFIG_EXT_PARTITION
+  }, {  // PARTITION_SPLIT
+    //                            4X4
+                                  BLOCK_INVALID,
+    // 4X8,        8X4,           8X8
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X4,
+    // 8X16,       16X8,          16X16
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X8,
+    // 16X32,      32X16,         32X32
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X16,
+    // 32X64,      64X32,         64X64
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X32,
+#if CONFIG_EXT_PARTITION
+    // 64x128,     128x64,        128x128
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X64,
+#endif  // CONFIG_EXT_PARTITION
+#if CONFIG_EXT_PARTITION_TYPES
+  }, {  // PARTITION_HORZ_A
+    //                            4X4
+                                  BLOCK_INVALID,
+    // 4X8,        8X4,           8X8
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4,
+    // 8X16,       16X8,          16X16
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8,
+    // 16X32,      32X16,         32X32
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16,
+    // 32X64,      64X32,         64X64
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32,
+#if CONFIG_EXT_PARTITION
+    // 64x128,     128x64,        128x128
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64,
+#endif  // CONFIG_EXT_PARTITION
+  }, {  // PARTITION_HORZ_B
+    //                            4X4
+                                  BLOCK_INVALID,
+    // 4X8,        8X4,           8X8
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4,
+    // 8X16,       16X8,          16X16
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8,
+    // 16X32,      32X16,         32X32
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16,
+    // 32X64,      64X32,         64X64
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32,
+#if CONFIG_EXT_PARTITION
+    // 64x128,     128x64,        128x128
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64,
+#endif  // CONFIG_EXT_PARTITION
+  }, {  // PARTITION_VERT_A
+    //                            4X4
+                                  BLOCK_INVALID,
+    // 4X8,        8X4,           8X8
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8,
+    // 8X16,       16X8,          16X16
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16,
+    // 16X32,      32X16,         32X32
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32,
+    // 32X64,      64X32,         64X64
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64,
+#if CONFIG_EXT_PARTITION
+    // 64x128,     128x64,        128x128
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128,
+#endif  // CONFIG_EXT_PARTITION
+  }, {  // PARTITION_VERT_B
+    //                            4X4
+                                  BLOCK_INVALID,
+    // 4X8,        8X4,           8X8
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8,
+    // 8X16,       16X8,          16X16
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16,
+    // 16X32,      32X16,         32X32
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32,
+    // 32X64,      64X32,         64X64
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64,
+#if CONFIG_EXT_PARTITION
+    // 64x128,     128x64,        128x128
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128,
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_EXT_PARTITION_TYPES
+  }
+};
+
+static const TX_SIZE max_txsize_lookup[BLOCK_SIZES] = {
+  //                   4X4
+                       TX_4X4,
+  // 4X8,    8X4,      8X8
+  TX_4X4,    TX_4X4,   TX_8X8,
+  // 8X16,   16X8,     16X16
+  TX_8X8,    TX_8X8,   TX_16X16,
+  // 16X32,  32X16,    32X32
+  TX_16X16,  TX_16X16, TX_32X32,
+  // 32X64,  64X32,    64X64
+  TX_32X32,  TX_32X32, TX_32X32,
+#if CONFIG_EXT_PARTITION
+  // 64x128, 128x64,   128x128
+  TX_32X32,  TX_32X32, TX_32X32,
+#endif  // CONFIG_EXT_PARTITION
+};
+
+#if CONFIG_EXT_TX
+static const TX_SIZE max_txsize_rect_lookup[BLOCK_SIZES] = {
+  //                   4X4
+                       TX_4X4,
+  // 4X8,    8X4,      8X8
+  TX_4X8,    TX_8X4,   TX_8X8,
+  // 8X16,   16X8,     16X16
+  TX_8X16,   TX_16X8,  TX_16X16,
+  // 16X32,  32X16,    32X32
+  TX_16X32,  TX_32X16, TX_32X32,
+  // 32X64,  64X32,    64X64
+  TX_32X32,  TX_32X32, TX_32X32,
+#if CONFIG_EXT_PARTITION
+  // 64x128, 128x64,   128x128
+  TX_32X32,  TX_32X32, TX_32X32,
+#endif  // CONFIG_EXT_PARTITION
+};
+#endif  // CONFIG_EXT_TX
+
+// Same as "max_txsize_lookup[bsize] - TX_8X8", invalid for bsize < 8X8
+static const int32_t intra_tx_size_cat_lookup[BLOCK_SIZES] = {
+  //                                      4X4
+                                          INT32_MIN,
+  // 4X8,             8X4,                8X8
+  INT32_MIN,          INT32_MIN,          TX_8X8 - TX_8X8,
+  // 8X16,            16X8,               16X16
+  TX_8X8 - TX_8X8,    TX_8X8 - TX_8X8,    TX_16X16 - TX_8X8,
+  // 16X32,           32X16,              32X32
+  TX_16X16 - TX_8X8,  TX_16X16 - TX_8X8,  TX_32X32 - TX_8X8,
+  // 32X64,           64X32,              64X64
+  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,
+#if CONFIG_EXT_PARTITION
+  // 64x128,          128x64,             128x128
+  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,
+#endif  // CONFIG_EXT_PARTITION
+};
+
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+// Same as "max_txsize_lookup[bsize] - TX_8X8", except for rectangular
+// block which may use a rectangular transform, in which  case it is
+// "(max_txsize_lookup[bsize] + 1) - TX_8X8", invalid for bsize < 8X8
+static const int32_t inter_tx_size_cat_lookup[BLOCK_SIZES] = {
+  //                                      4X4
+                                          INT32_MIN,
+  // 4X8,             8X4,                8X8
+  INT32_MIN,          INT32_MIN,           TX_8X8 - TX_8X8,
+  // 8X16,            16X8,               16X16
+  TX_16X16 - TX_8X8,  TX_16X16 - TX_8X8,  TX_16X16 - TX_8X8,
+  // 16X32,           32X16,              32X32
+  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,
+  // 32X64,           64X32,              64X64
+  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,
+#if CONFIG_EXT_PARTITION
+  // 64x128,          128x64,             128x128
+  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,
+#endif  // CONFIG_EXT_PARTITION
+};
+#else
+#define inter_tx_size_cat_lookup intra_tx_size_cat_lookup
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
+
+/* clang-format on */
+
+static const TX_SIZE txsize_horz_map[TX_SIZES_ALL] = {
+  TX_4X4,    // TX_4X4
+  TX_8X8,    // TX_8X8
+  TX_16X16,  // TX_16X16
+  TX_32X32,  // TX_32X32
+#if CONFIG_EXT_TX
+  TX_4X4,    // TX_4X8
+  TX_8X8,    // TX_8X4
+  TX_8X8,    // TX_8X16
+  TX_16X16,  // TX_16X8
+  TX_16X16,  // TX_16X32
+  TX_32X32   // TX_32X16
+#endif       // CONFIG_EXT_TX
+};
+
+static const TX_SIZE txsize_vert_map[TX_SIZES_ALL] = {
+  TX_4X4,    // TX_4X4
+  TX_8X8,    // TX_8X8
+  TX_16X16,  // TX_16X16
+  TX_32X32,  // TX_32X32
+#if CONFIG_EXT_TX
+  TX_8X8,    // TX_4X8
+  TX_4X4,    // TX_8X4
+  TX_16X16,  // TX_8X16
+  TX_8X8,    // TX_16X8
+  TX_32X32,  // TX_16X32
+  TX_16X16   // TX_32X16
+#endif       // CONFIG_EXT_TX
+};
+
+static const BLOCK_SIZE txsize_to_bsize[TX_SIZES_ALL] = {
+  BLOCK_4X4,    // TX_4X4
+  BLOCK_8X8,    // TX_8X8
+  BLOCK_16X16,  // TX_16X16
+  BLOCK_32X32,  // TX_32X32
+#if CONFIG_EXT_TX
+  BLOCK_4X8,    // TX_4X8
+  BLOCK_8X4,    // TX_8X4
+  BLOCK_8X16,   // TX_8X16
+  BLOCK_16X8,   // TX_16X8
+  BLOCK_16X32,  // TX_16X32
+  BLOCK_32X16,  // TX_32X16
+#endif          // CONFIG_EXT_TX
+};
+
+static const TX_SIZE txsize_sqr_map[TX_SIZES_ALL] = {
+  TX_4X4,    // TX_4X4
+  TX_8X8,    // TX_8X8
+  TX_16X16,  // TX_16X16
+  TX_32X32,  // TX_32X32
+#if CONFIG_EXT_TX
+  TX_4X4,    // TX_4X8
+  TX_4X4,    // TX_8X4
+  TX_8X8,    // TX_8X16
+  TX_8X8,    // TX_16X8
+  TX_16X16,  // TX_16X32
+  TX_16X16,  // TX_32X16
+#endif       // CONFIG_EXT_TX
+};
+
+static const TX_SIZE txsize_sqr_up_map[TX_SIZES_ALL] = {
+  TX_4X4,    // TX_4X4
+  TX_8X8,    // TX_8X8
+  TX_16X16,  // TX_16X16
+  TX_32X32,  // TX_32X32
+#if CONFIG_EXT_TX
+  TX_8X8,    // TX_4X8
+  TX_8X8,    // TX_8X4
+  TX_16X16,  // TX_8X16
+  TX_16X16,  // TX_16X8
+  TX_32X32,  // TX_16X32
+  TX_32X32,  // TX_32X16
+#endif       // CONFIG_EXT_TX
+};
+
+static const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES] = {
+  TX_4X4,    // ONLY_4X4
+  TX_8X8,    // ALLOW_8X8
+  TX_16X16,  // ALLOW_16X16
+  TX_32X32,  // ALLOW_32X32
+  TX_32X32,  // TX_MODE_SELECT
+};
+
+static const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2] = {
+  //  ss_x == 0    ss_x == 0        ss_x == 1      ss_x == 1
+  //  ss_y == 0    ss_y == 1        ss_y == 0      ss_y == 1
+  { { BLOCK_4X4, BLOCK_INVALID }, { BLOCK_INVALID, BLOCK_INVALID } },
+  { { BLOCK_4X8, BLOCK_4X4 }, { BLOCK_INVALID, BLOCK_INVALID } },
+  { { BLOCK_8X4, BLOCK_INVALID }, { BLOCK_4X4, BLOCK_INVALID } },
+  { { BLOCK_8X8, BLOCK_8X4 }, { BLOCK_4X8, BLOCK_4X4 } },
+  { { BLOCK_8X16, BLOCK_8X8 }, { BLOCK_INVALID, BLOCK_4X8 } },
+  { { BLOCK_16X8, BLOCK_INVALID }, { BLOCK_8X8, BLOCK_8X4 } },
+  { { BLOCK_16X16, BLOCK_16X8 }, { BLOCK_8X16, BLOCK_8X8 } },
+  { { BLOCK_16X32, BLOCK_16X16 }, { BLOCK_INVALID, BLOCK_8X16 } },
+  { { BLOCK_32X16, BLOCK_INVALID }, { BLOCK_16X16, BLOCK_16X8 } },
+  { { BLOCK_32X32, BLOCK_32X16 }, { BLOCK_16X32, BLOCK_16X16 } },
+  { { BLOCK_32X64, BLOCK_32X32 }, { BLOCK_INVALID, BLOCK_16X32 } },
+  { { BLOCK_64X32, BLOCK_INVALID }, { BLOCK_32X32, BLOCK_32X16 } },
+  { { BLOCK_64X64, BLOCK_64X32 }, { BLOCK_32X64, BLOCK_32X32 } },
+#if CONFIG_EXT_PARTITION
+  { { BLOCK_64X128, BLOCK_64X64 }, { BLOCK_INVALID, BLOCK_32X64 } },
+  { { BLOCK_128X64, BLOCK_INVALID }, { BLOCK_64X64, BLOCK_64X32 } },
+  { { BLOCK_128X128, BLOCK_128X64 }, { BLOCK_64X128, BLOCK_64X64 } },
+#endif  // CONFIG_EXT_PARTITION
+};
+
+// Generates 4 bit field in which each bit set to 1 represents
+// a blocksize partition  1111 means we split 64x64, 32x32, 16x16
+// and 8x8.  1000 means we just split the 64x64 to 32x32
+static const struct {
+  PARTITION_CONTEXT above;
+  PARTITION_CONTEXT left;
+} partition_context_lookup[BLOCK_SIZES] = {
+#if CONFIG_EXT_PARTITION
+  { 31, 31 },  // 4X4   - {0b11111, 0b11111}
+  { 31, 30 },  // 4X8   - {0b11111, 0b11110}
+  { 30, 31 },  // 8X4   - {0b11110, 0b11111}
+  { 30, 30 },  // 8X8   - {0b11110, 0b11110}
+  { 30, 28 },  // 8X16  - {0b11110, 0b11100}
+  { 28, 30 },  // 16X8  - {0b11100, 0b11110}
+  { 28, 28 },  // 16X16 - {0b11100, 0b11100}
+  { 28, 24 },  // 16X32 - {0b11100, 0b11000}
+  { 24, 28 },  // 32X16 - {0b11000, 0b11100}
+  { 24, 24 },  // 32X32 - {0b11000, 0b11000}
+  { 24, 16 },  // 32X64 - {0b11000, 0b10000}
+  { 16, 24 },  // 64X32 - {0b10000, 0b11000}
+  { 16, 16 },  // 64X64 - {0b10000, 0b10000}
+  { 16, 0 },   // 64X128- {0b10000, 0b00000}
+  { 0, 16 },   // 128X64- {0b00000, 0b10000}
+  { 0, 0 },    // 128X128-{0b00000, 0b00000}
+#else
+  { 15, 15 },  // 4X4   - {0b1111, 0b1111}
+  { 15, 14 },  // 4X8   - {0b1111, 0b1110}
+  { 14, 15 },  // 8X4   - {0b1110, 0b1111}
+  { 14, 14 },  // 8X8   - {0b1110, 0b1110}
+  { 14, 12 },  // 8X16  - {0b1110, 0b1100}
+  { 12, 14 },  // 16X8  - {0b1100, 0b1110}
+  { 12, 12 },  // 16X16 - {0b1100, 0b1100}
+  { 12, 8 },   // 16X32 - {0b1100, 0b1000}
+  { 8, 12 },   // 32X16 - {0b1000, 0b1100}
+  { 8, 8 },    // 32X32 - {0b1000, 0b1000}
+  { 8, 0 },    // 32X64 - {0b1000, 0b0000}
+  { 0, 8 },    // 64X32 - {0b0000, 0b1000}
+  { 0, 0 },    // 64X64 - {0b0000, 0b0000}
+#endif  // CONFIG_EXT_PARTITION
+};
+
+#if CONFIG_SUPERTX
+static const TX_SIZE uvsupertx_size_lookup[TX_SIZES][2][2] = {
+  //  ss_x == 0 ss_x == 0   ss_x == 1 ss_x == 1
+  //  ss_y == 0 ss_y == 1   ss_y == 0 ss_y == 1
+  { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+  { { TX_8X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
+  { { TX_16X16, TX_8X8 }, { TX_8X8, TX_8X8 } },
+  { { TX_32X32, TX_16X16 }, { TX_16X16, TX_16X16 } },
+};
+
+#if CONFIG_EXT_PARTITION_TYPES
+static const int partition_supertx_context_lookup[EXT_PARTITION_TYPES] = {
+  -1, 0, 0, 1, 0, 0, 0, 0
+};
+
+#else
+static const int partition_supertx_context_lookup[PARTITION_TYPES] = { -1, 0, 0,
+                                                                       1 };
+#endif  // CONFIG_EXT_PARTITION_TYPES
+#endif  // CONFIG_SUPERTX
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_COMMON_DATA_H_
diff --git a/av1/common/debugmodes.c b/av1/common/debugmodes.c
new file mode 100644
index 0000000..6c958a8
--- /dev/null
+++ b/av1/common/debugmodes.c
@@ -0,0 +1,89 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdio.h>
+
+#include "av1/common/blockd.h"
+#include "av1/common/onyxc_int.h"
+
+static void log_frame_info(VP10_COMMON *cm, const char *str, FILE *f) {
+  fprintf(f, "%s", str);
+  fprintf(f, "(Frame %d, Show:%d, Q:%d): \n", cm->current_video_frame,
+          cm->show_frame, cm->base_qindex);
+}
+/* This function dereferences a pointer to the mbmi structure
+ * and uses the passed in member offset to print out the value of an integer
+ * for each mbmi member value in the mi structure.
+ */
+static void print_mi_data(VP10_COMMON *cm, FILE *file, const char *descriptor,
+                          size_t member_offset) {
+  int mi_row, mi_col;
+  MODE_INFO **mi = cm->mi_grid_visible;
+  int rows = cm->mi_rows;
+  int cols = cm->mi_cols;
+  char prefix = descriptor[0];
+
+  log_frame_info(cm, descriptor, file);
+  for (mi_row = 0; mi_row < rows; mi_row++) {
+    fprintf(file, "%c ", prefix);
+    for (mi_col = 0; mi_col < cols; mi_col++) {
+      fprintf(file, "%2d ", *((int *)((char *)(&mi[0]->mbmi) + member_offset)));
+      mi++;
+    }
+    fprintf(file, "\n");
+    mi += 8;
+  }
+  fprintf(file, "\n");
+}
+
+void vp10_print_modes_and_motion_vectors(VP10_COMMON *cm, const char *file) {
+  int mi_row;
+  int mi_col;
+  FILE *mvs = fopen(file, "a");
+  MODE_INFO **mi = cm->mi_grid_visible;
+  int rows = cm->mi_rows;
+  int cols = cm->mi_cols;
+
+  print_mi_data(cm, mvs, "Partitions:", offsetof(MB_MODE_INFO, sb_type));
+  print_mi_data(cm, mvs, "Modes:", offsetof(MB_MODE_INFO, mode));
+  print_mi_data(cm, mvs, "Ref frame:", offsetof(MB_MODE_INFO, ref_frame[0]));
+  print_mi_data(cm, mvs, "Transform:", offsetof(MB_MODE_INFO, tx_size));
+  print_mi_data(cm, mvs, "UV Modes:", offsetof(MB_MODE_INFO, uv_mode));
+
+  // output skip infomation.
+  log_frame_info(cm, "Skips:", mvs);
+  for (mi_row = 0; mi_row < rows; mi_row++) {
+    fprintf(mvs, "S ");
+    for (mi_col = 0; mi_col < cols; mi_col++) {
+      fprintf(mvs, "%2d ", mi[0]->mbmi.skip);
+      mi++;
+    }
+    fprintf(mvs, "\n");
+    mi += 8;
+  }
+  fprintf(mvs, "\n");
+
+  // output motion vectors.
+  log_frame_info(cm, "Vectors ", mvs);
+  mi = cm->mi_grid_visible;
+  for (mi_row = 0; mi_row < rows; mi_row++) {
+    fprintf(mvs, "V ");
+    for (mi_col = 0; mi_col < cols; mi_col++) {
+      fprintf(mvs, "%4d:%4d ", mi[0]->mbmi.mv[0].as_mv.row,
+              mi[0]->mbmi.mv[0].as_mv.col);
+      mi++;
+    }
+    fprintf(mvs, "\n");
+    mi += 8;
+  }
+  fprintf(mvs, "\n");
+
+  fclose(mvs);
+}
diff --git a/av1/common/dering.c b/av1/common/dering.c
new file mode 100644
index 0000000..7c116a2
--- /dev/null
+++ b/av1/common/dering.c
@@ -0,0 +1,151 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <string.h>
+#include <math.h>
+
+#include "./vpx_scale_rtcd.h"
+#include "aom/vpx_integer.h"
+#include "av1/common/dering.h"
+#include "av1/common/onyxc_int.h"
+#include "av1/common/reconinter.h"
+#include "av1/common/od_dering.h"
+
+int compute_level_from_index(int global_level, int gi) {
+  static const int dering_gains[DERING_REFINEMENT_LEVELS] = { 0, 11, 16, 22 };
+  int level;
+  if (global_level == 0) return 0;
+  level = (global_level * dering_gains[gi] + 8) >> 4;
+  return clamp(level, gi, MAX_DERING_LEVEL - 1);
+}
+
+int sb_all_skip(const VP10_COMMON *const cm, int mi_row, int mi_col) {
+  int r, c;
+  int maxc, maxr;
+  int skip = 1;
+  maxc = cm->mi_cols - mi_col;
+  maxr = cm->mi_rows - mi_row;
+  if (maxr > MAX_MIB_SIZE) maxr = MAX_MIB_SIZE;
+  if (maxc > MAX_MIB_SIZE) maxc = MAX_MIB_SIZE;
+  for (r = 0; r < maxr; r++) {
+    for (c = 0; c < maxc; c++) {
+      skip = skip &&
+             cm->mi_grid_visible[(mi_row + r) * cm->mi_stride + mi_col + c]
+                 ->mbmi.skip;
+    }
+  }
+  return skip;
+}
+
+void vp10_dering_frame(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm,
+                       MACROBLOCKD *xd, int global_level) {
+  int r, c;
+  int sbr, sbc;
+  int nhsb, nvsb;
+  od_dering_in *src[3];
+  unsigned char *bskip;
+  int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
+  int stride;
+  int bsize[3];
+  int dec[3];
+  int pli;
+  int coeff_shift = VPXMAX(cm->bit_depth - 8, 0);
+  nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
+  nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
+  bskip = vpx_malloc(sizeof(*bskip) * cm->mi_rows * cm->mi_cols);
+  vp10_setup_dst_planes(xd->plane, frame, 0, 0);
+  for (pli = 0; pli < 3; pli++) {
+    dec[pli] = xd->plane[pli].subsampling_x;
+    bsize[pli] = 8 >> dec[pli];
+  }
+  stride = bsize[0] * cm->mi_cols;
+  for (pli = 0; pli < 3; pli++) {
+    src[pli] = vpx_malloc(sizeof(*src) * cm->mi_rows * cm->mi_cols * 64);
+    for (r = 0; r < bsize[pli] * cm->mi_rows; ++r) {
+      for (c = 0; c < bsize[pli] * cm->mi_cols; ++c) {
+#if CONFIG_VP9_HIGHBITDEPTH
+        if (cm->use_highbitdepth) {
+          src[pli][r * stride + c] = CONVERT_TO_SHORTPTR(
+              xd->plane[pli].dst.buf)[r * xd->plane[pli].dst.stride + c];
+        } else {
+#endif
+          src[pli][r * stride + c] =
+              xd->plane[pli].dst.buf[r * xd->plane[pli].dst.stride + c];
+#if CONFIG_VP9_HIGHBITDEPTH
+        }
+#endif
+      }
+    }
+  }
+  for (r = 0; r < cm->mi_rows; ++r) {
+    for (c = 0; c < cm->mi_cols; ++c) {
+      const MB_MODE_INFO *mbmi =
+          &cm->mi_grid_visible[r * cm->mi_stride + c]->mbmi;
+      bskip[r * cm->mi_cols + c] = mbmi->skip;
+    }
+  }
+  for (sbr = 0; sbr < nvsb; sbr++) {
+    for (sbc = 0; sbc < nhsb; sbc++) {
+      int level;
+      int nhb, nvb;
+      nhb = VPXMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
+      nvb = VPXMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr);
+      for (pli = 0; pli < 3; pli++) {
+        int16_t dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8];
+        int threshold;
+#if DERING_REFINEMENT
+        level = compute_level_from_index(
+            global_level,
+            cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
+                                MAX_MIB_SIZE * sbc]
+                ->mbmi.dering_gain);
+#else
+          level = global_level;
+#endif
+        /* FIXME: This is a temporary hack that uses more conservative
+           deringing for chroma. */
+        if (pli) level = (level * 5 + 4) >> 3;
+        if (sb_all_skip(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE)) level = 0;
+        threshold = level << coeff_shift;
+        od_dering(&OD_DERING_VTBL_C, dst, MAX_MIB_SIZE * bsize[pli],
+                  &src[pli][sbr * stride * bsize[pli] * MAX_MIB_SIZE +
+                            sbc * bsize[pli] * MAX_MIB_SIZE],
+                  stride, nhb, nvb, sbc, sbr, nhsb, nvsb, dec[pli], dir, pli,
+                  &bskip[MAX_MIB_SIZE * sbr * cm->mi_cols + MAX_MIB_SIZE * sbc],
+                  cm->mi_cols, threshold, OD_DERING_NO_CHECK_OVERLAP,
+                  coeff_shift);
+        for (r = 0; r < bsize[pli] * nvb; ++r) {
+          for (c = 0; c < bsize[pli] * nhb; ++c) {
+#if CONFIG_VP9_HIGHBITDEPTH
+            if (cm->use_highbitdepth) {
+              CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf)
+              [xd->plane[pli].dst.stride *
+                   (bsize[pli] * MAX_MIB_SIZE * sbr + r) +
+               sbc * bsize[pli] * MAX_MIB_SIZE + c] =
+                  dst[r * MAX_MIB_SIZE * bsize[pli] + c];
+            } else {
+#endif
+              xd->plane[pli].dst.buf[xd->plane[pli].dst.stride *
+                                         (bsize[pli] * MAX_MIB_SIZE * sbr + r) +
+                                     sbc * bsize[pli] * MAX_MIB_SIZE + c] =
+                  dst[r * MAX_MIB_SIZE * bsize[pli] + c];
+#if CONFIG_VP9_HIGHBITDEPTH
+            }
+#endif
+          }
+        }
+      }
+    }
+  }
+  for (pli = 0; pli < 3; pli++) {
+    vpx_free(src[pli]);
+  }
+  vpx_free(bskip);
+}
diff --git a/av1/common/dering.h b/av1/common/dering.h
new file mode 100644
index 0000000..de59c86
--- /dev/null
+++ b/av1/common/dering.h
@@ -0,0 +1,32 @@
+#ifndef VP10_COMMON_DERING_H_
+#define VP10_COMMON_DERING_H_
+
+#include "av1/common/od_dering.h"
+#include "av1/common/onyxc_int.h"
+#include "aom/vpx_integer.h"
+#include "./vpx_config.h"
+#include "aom_ports/mem.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define DERING_LEVEL_BITS 6
+#define MAX_DERING_LEVEL (1 << DERING_LEVEL_BITS)
+
+#define DERING_REFINEMENT 1
+#define DERING_REFINEMENT_BITS 2
+#define DERING_REFINEMENT_LEVELS 4
+
+int compute_level_from_index(int global_level, int gi);
+int sb_all_skip(const VP10_COMMON *const cm, int mi_row, int mi_col);
+void vp10_dering_frame(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm,
+                       MACROBLOCKD *xd, int global_level);
+
+int vp10_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
+                       VP10_COMMON *cm, MACROBLOCKD *xd);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+#endif  // VP10_COMMON_DERING_H_
diff --git a/av1/common/divide.c b/av1/common/divide.c
new file mode 100644
index 0000000..f0c6730
--- /dev/null
+++ b/av1/common/divide.c
@@ -0,0 +1,115 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "av1/common/divide.h"
+
+/* Constants for divide by multiply for small divisors generated with:
+void init_fastdiv() {
+  int i;
+  for (i = 3; i < 256; ++i) {
+    const int s = 31 ^ __builtin_clz(2 * i + 1);
+    const unsigned long long base = (1ull << (sizeof(unsigned) * 8 + s)) - 1;
+    fastdiv_tab[i].mult = (base / i + 1) & 0xFFFFFFFF;
+    fastdiv_tab[i].shift = s;
+  }
+  for (i = 0; i < 8; ++i) {
+    fastdiv_tab[1 << i].mult = 0;
+    fastdiv_tab[1 << i].shift = i;
+  }
+}
+*/
+const struct fastdiv_elem vp10_fastdiv_tab[256] = {
+  { 0, 0 },           { 0, 0 },           { 0, 1 },
+  { 1431655766, 2 },  { 0, 2 },           { 2576980378u, 3 },
+  { 1431655766, 3 },  { 613566757, 3 },   { 0, 3 },
+  { 3340530120u, 4 }, { 2576980378u, 4 }, { 1952257862, 4 },
+  { 1431655766, 4 },  { 991146300, 4 },   { 613566757, 4 },
+  { 286331154u, 4 },  { 0, 4 },           { 3789677026u, 5 },
+  { 3340530120u, 5 }, { 2938661835u, 5 }, { 2576980378u, 5 },
+  { 2249744775u, 5 }, { 1952257862, 5 },  { 1680639377, 5 },
+  { 1431655766, 5 },  { 1202590843, 5 },  { 991146300, 5 },
+  { 795364315, 5 },   { 613566757, 5 },   { 444306962, 5 },
+  { 286331154, 5 },   { 138547333, 5 },   { 0, 5 },
+  { 4034666248u, 6 }, { 3789677026u, 6 }, { 3558687189u, 6 },
+  { 3340530120u, 6 }, { 3134165325u, 6 }, { 2938661835u, 6 },
+  { 2753184165u, 6 }, { 2576980378u, 6 }, { 2409371898u, 6 },
+  { 2249744775u, 6 }, { 2097542168u, 6 }, { 1952257862, 6 },
+  { 1813430637, 6 },  { 1680639377, 6 },  { 1553498810, 6 },
+  { 1431655766, 6 },  { 1314785907, 6 },  { 1202590843, 6 },
+  { 1094795586, 6 },  { 991146300, 6 },   { 891408307, 6 },
+  { 795364315, 6 },   { 702812831, 6 },   { 613566757, 6 },
+  { 527452125, 6 },   { 444306962, 6 },   { 363980280, 6 },
+  { 286331154, 6 },   { 211227900, 6 },   { 138547333, 6 },
+  { 68174085, 6 },    { 0, 6 },           { 4162814457u, 7 },
+  { 4034666248u, 7 }, { 3910343360u, 7 }, { 3789677026u, 7 },
+  { 3672508268u, 7 }, { 3558687189u, 7 }, { 3448072337u, 7 },
+  { 3340530120u, 7 }, { 3235934265u, 7 }, { 3134165325u, 7 },
+  { 3035110223u, 7 }, { 2938661835u, 7 }, { 2844718599u, 7 },
+  { 2753184165u, 7 }, { 2663967058u, 7 }, { 2576980378u, 7 },
+  { 2492141518u, 7 }, { 2409371898u, 7 }, { 2328596727u, 7 },
+  { 2249744775u, 7 }, { 2172748162u, 7 }, { 2097542168, 7 },
+  { 2024065048, 7 },  { 1952257862, 7 },  { 1882064321, 7 },
+  { 1813430637, 7 },  { 1746305385, 7 },  { 1680639377, 7 },
+  { 1616385542, 7 },  { 1553498810, 7 },  { 1491936009, 7 },
+  { 1431655766, 7 },  { 1372618415, 7 },  { 1314785907, 7 },
+  { 1258121734, 7 },  { 1202590843, 7 },  { 1148159575, 7 },
+  { 1094795586, 7 },  { 1042467791, 7 },  { 991146300, 7 },
+  { 940802361, 7 },   { 891408307, 7 },   { 842937507, 7 },
+  { 795364315, 7 },   { 748664025, 7 },   { 702812831, 7 },
+  { 657787785, 7 },   { 613566757, 7 },   { 570128403, 7 },
+  { 527452125, 7 },   { 485518043, 7 },   { 444306962, 7 },
+  { 403800345, 7 },   { 363980280, 7 },   { 324829460, 7 },
+  { 286331154, 7 },   { 248469183, 7 },   { 211227900, 7 },
+  { 174592167, 7 },   { 138547333, 7 },   { 103079216, 7 },
+  { 68174085, 7 },    { 33818641, 7 },    { 0, 7 },
+  { 4228378656u, 8 }, { 4162814457u, 8 }, { 4098251237u, 8 },
+  { 4034666248u, 8 }, { 3972037425u, 8 }, { 3910343360u, 8 },
+  { 3849563281u, 8 }, { 3789677026u, 8 }, { 3730665024u, 8 },
+  { 3672508268u, 8 }, { 3615188300u, 8 }, { 3558687189u, 8 },
+  { 3502987511u, 8 }, { 3448072337u, 8 }, { 3393925206u, 8 },
+  { 3340530120u, 8 }, { 3287871517u, 8 }, { 3235934265u, 8 },
+  { 3184703642u, 8 }, { 3134165325u, 8 }, { 3084305374u, 8 },
+  { 3035110223u, 8 }, { 2986566663u, 8 }, { 2938661835u, 8 },
+  { 2891383213u, 8 }, { 2844718599u, 8 }, { 2798656110u, 8 },
+  { 2753184165u, 8 }, { 2708291480u, 8 }, { 2663967058u, 8 },
+  { 2620200175u, 8 }, { 2576980378u, 8 }, { 2534297473u, 8 },
+  { 2492141518u, 8 }, { 2450502814u, 8 }, { 2409371898u, 8 },
+  { 2368739540u, 8 }, { 2328596727u, 8 }, { 2288934667u, 8 },
+  { 2249744775u, 8 }, { 2211018668u, 8 }, { 2172748162u, 8 },
+  { 2134925265u, 8 }, { 2097542168, 8 },  { 2060591247, 8 },
+  { 2024065048, 8 },  { 1987956292, 8 },  { 1952257862, 8 },
+  { 1916962805, 8 },  { 1882064321, 8 },  { 1847555765, 8 },
+  { 1813430637, 8 },  { 1779682582, 8 },  { 1746305385, 8 },
+  { 1713292966, 8 },  { 1680639377, 8 },  { 1648338801, 8 },
+  { 1616385542, 8 },  { 1584774030, 8 },  { 1553498810, 8 },
+  { 1522554545, 8 },  { 1491936009, 8 },  { 1461638086, 8 },
+  { 1431655766, 8 },  { 1401984144, 8 },  { 1372618415, 8 },
+  { 1343553873, 8 },  { 1314785907, 8 },  { 1286310003, 8 },
+  { 1258121734, 8 },  { 1230216764, 8 },  { 1202590843, 8 },
+  { 1175239808, 8 },  { 1148159575, 8 },  { 1121346142, 8 },
+  { 1094795586, 8 },  { 1068504060, 8 },  { 1042467791, 8 },
+  { 1016683080, 8 },  { 991146300, 8 },   { 965853890, 8 },
+  { 940802361, 8 },   { 915988286, 8 },   { 891408307, 8 },
+  { 867059126, 8 },   { 842937507, 8 },   { 819040276, 8 },
+  { 795364315, 8 },   { 771906565, 8 },   { 748664025, 8 },
+  { 725633745, 8 },   { 702812831, 8 },   { 680198441, 8 },
+  { 657787785, 8 },   { 635578121, 8 },   { 613566757, 8 },
+  { 591751050, 8 },   { 570128403, 8 },   { 548696263, 8 },
+  { 527452125, 8 },   { 506393524, 8 },   { 485518043, 8 },
+  { 464823301, 8 },   { 444306962, 8 },   { 423966729, 8 },
+  { 403800345, 8 },   { 383805589, 8 },   { 363980280, 8 },
+  { 344322273, 8 },   { 324829460, 8 },   { 305499766, 8 },
+  { 286331154, 8 },   { 267321616, 8 },   { 248469183, 8 },
+  { 229771913, 8 },   { 211227900, 8 },   { 192835267, 8 },
+  { 174592167, 8 },   { 156496785, 8 },   { 138547333, 8 },
+  { 120742053, 8 },   { 103079216, 8 },   { 85557118, 8 },
+  { 68174085, 8 },    { 50928466, 8 },    { 33818641, 8 },
+  { 16843010, 8 },
+};
diff --git a/av1/common/divide.h b/av1/common/divide.h
new file mode 100644
index 0000000..7de6c91
--- /dev/null
+++ b/av1/common/divide.h
@@ -0,0 +1,40 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_DIVIDE_H_
+#define VP10_COMMON_DIVIDE_H_
+// An implemntation of the divide by multiply alogrithm
+// https://gmplib.org/~tege/divcnst-pldi94.pdf
+
+#include <limits.h>
+
+#include "./vpx_config.h"
+#include "aom/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+struct fastdiv_elem {
+  unsigned mult;
+  unsigned shift;
+};
+
+extern const struct fastdiv_elem vp10_fastdiv_tab[256];
+
+static INLINE unsigned fastdiv(unsigned x, int y) {
+  unsigned t =
+      ((uint64_t)x * vp10_fastdiv_tab[y].mult) >> (sizeof(x) * CHAR_BIT);
+  return (t + x) >> vp10_fastdiv_tab[y].shift;
+}
+#ifdef __cplusplus
+}  // extern "C"
+#endif  // __cplusplus
+#endif  // VP10_COMMON_DIVIDE_H_
diff --git a/av1/common/entropy.c b/av1/common/entropy.c
new file mode 100644
index 0000000..be96c42
--- /dev/null
+++ b/av1/common/entropy.c
@@ -0,0 +1,2922 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "av1/common/entropy.h"
+#include "av1/common/blockd.h"
+#include "av1/common/onyxc_int.h"
+#include "av1/common/entropymode.h"
+#include "aom_mem/vpx_mem.h"
+#include "aom/vpx_integer.h"
+
+// Unconstrained Node Tree
+/* clang-format off */
+const vpx_tree_index vp10_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
+  2, 6,                                // 0 = LOW_VAL
+  -TWO_TOKEN, 4,                       // 1 = TWO
+  -THREE_TOKEN, -FOUR_TOKEN,           // 2 = THREE
+  8, 10,                               // 3 = HIGH_LOW
+  -CATEGORY1_TOKEN, -CATEGORY2_TOKEN,  // 4 = CAT_ONE
+  12, 14,                              // 5 = CAT_THREEFOUR
+  -CATEGORY3_TOKEN, -CATEGORY4_TOKEN,  // 6 = CAT_THREE
+  -CATEGORY5_TOKEN, -CATEGORY6_TOKEN   // 7 = CAT_FIVE
+};
+/* clang-format on */
+
+const vpx_prob vp10_cat1_prob[] = { 159 };
+const vpx_prob vp10_cat2_prob[] = { 165, 145 };
+const vpx_prob vp10_cat3_prob[] = { 173, 148, 140 };
+const vpx_prob vp10_cat4_prob[] = { 176, 155, 140, 135 };
+const vpx_prob vp10_cat5_prob[] = { 180, 157, 141, 134, 130 };
+const vpx_prob vp10_cat6_prob[] = { 254, 254, 254, 252, 249, 243, 230,
+                                    196, 177, 153, 140, 133, 130, 129 };
+#if CONFIG_VP9_HIGHBITDEPTH
+const vpx_prob vp10_cat1_prob_high10[] = { 159 };
+const vpx_prob vp10_cat2_prob_high10[] = { 165, 145 };
+const vpx_prob vp10_cat3_prob_high10[] = { 173, 148, 140 };
+const vpx_prob vp10_cat4_prob_high10[] = { 176, 155, 140, 135 };
+const vpx_prob vp10_cat5_prob_high10[] = { 180, 157, 141, 134, 130 };
+const vpx_prob vp10_cat6_prob_high10[] = {
+  255, 255, 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129
+};
+const vpx_prob vp10_cat1_prob_high12[] = { 159 };
+const vpx_prob vp10_cat2_prob_high12[] = { 165, 145 };
+const vpx_prob vp10_cat3_prob_high12[] = { 173, 148, 140 };
+const vpx_prob vp10_cat4_prob_high12[] = { 176, 155, 140, 135 };
+const vpx_prob vp10_cat5_prob_high12[] = { 180, 157, 141, 134, 130 };
+const vpx_prob vp10_cat6_prob_high12[] = { 255, 255, 255, 255, 254, 254,
+                                           254, 252, 249, 243, 230, 196,
+                                           177, 153, 140, 133, 130, 129 };
+#endif
+
+const uint16_t band_count_table[TX_SIZES_ALL][8] = {
+  { 1, 2, 3, 4, 3, 16 - 13, 0 },   { 1, 2, 3, 4, 11, 64 - 21, 0 },
+  { 1, 2, 3, 4, 11, 256 - 21, 0 }, { 1, 2, 3, 4, 11, 1024 - 21, 0 },
+#if CONFIG_EXT_TX
+  { 1, 2, 3, 4, 8, 32 - 18, 0 },   { 1, 2, 3, 4, 8, 32 - 18, 0 },
+  { 1, 2, 3, 4, 11, 128 - 21, 0 }, { 1, 2, 3, 4, 11, 128 - 21, 0 },
+  { 1, 2, 3, 4, 11, 512 - 21, 0 }, { 1, 2, 3, 4, 11, 512 - 21, 0 },
+#endif  // CONFIG_EXT_TX
+};
+
+const uint16_t band_cum_count_table[TX_SIZES_ALL][8] = {
+  { 0, 1, 3, 6, 10, 13, 16, 0 },  { 0, 1, 3, 6, 10, 21, 64, 0 },
+  { 0, 1, 3, 6, 10, 21, 256, 0 }, { 0, 1, 3, 6, 10, 21, 1024, 0 },
+#if CONFIG_EXT_TX
+  { 0, 1, 3, 6, 10, 18, 32, 0 },  { 0, 1, 3, 6, 10, 18, 32, 0 },
+  { 0, 1, 3, 6, 10, 21, 128, 0 }, { 0, 1, 3, 6, 10, 21, 128, 0 },
+  { 0, 1, 3, 6, 10, 21, 512, 0 }, { 0, 1, 3, 6, 10, 21, 512, 0 },
+#endif  // CONFIG_EXT_TX
+};
+
+const uint8_t vp10_coefband_trans_8x8plus[1024] = {
+  0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5,
+  // beyond MAXBAND_INDEX+1 all values are filled as 5
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+};
+
+#if CONFIG_EXT_TX
+const uint8_t vp10_coefband_trans_4x8_8x4[32] = {
+  0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
+  4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+};
+#endif  // CONFIG_EXT_TX
+
+const uint8_t vp10_coefband_trans_4x4[16] = {
+  0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5,
+};
+
+const uint8_t vp10_pt_energy_class[ENTROPY_TOKENS] = { 0, 1, 2, 3, 3, 4,
+                                                       4, 5, 5, 5, 5, 5 };
+
+// Model obtained from a 2-sided zero-centered distribution derived
+// from a Pareto distribution. The cdf of the distribution is:
+// cdf(x) = 0.5 + 0.5 * sgn(x) * [1 - {alpha/(alpha + |x|)} ^ beta]
+//
+// For a given beta and a given probablity of the 1-node, the alpha
+// is first solved, and then the {alpha, beta} pair is used to generate
+// the probabilities for the rest of the nodes.
+
+// beta = 8
+
+// Every odd line in this table can be generated from the even lines
+// by averaging :
+// vp10_pareto8_full[l][node] = (vp10_pareto8_full[l-1][node] +
+//                              vp10_pareto8_full[l+1][node] ) >> 1;
+const vpx_prob vp10_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES] = {
+  { 3, 86, 128, 6, 86, 23, 88, 29 },
+  { 6, 86, 128, 11, 87, 42, 91, 52 },
+  { 9, 86, 129, 17, 88, 61, 94, 76 },
+  { 12, 86, 129, 22, 88, 77, 97, 93 },
+  { 15, 87, 129, 28, 89, 93, 100, 110 },
+  { 17, 87, 129, 33, 90, 105, 103, 123 },
+  { 20, 88, 130, 38, 91, 118, 106, 136 },
+  { 23, 88, 130, 43, 91, 128, 108, 146 },
+  { 26, 89, 131, 48, 92, 139, 111, 156 },
+  { 28, 89, 131, 53, 93, 147, 114, 163 },
+  { 31, 90, 131, 58, 94, 156, 117, 171 },
+  { 34, 90, 131, 62, 94, 163, 119, 177 },
+  { 37, 90, 132, 66, 95, 171, 122, 184 },
+  { 39, 90, 132, 70, 96, 177, 124, 189 },
+  { 42, 91, 132, 75, 97, 183, 127, 194 },
+  { 44, 91, 132, 79, 97, 188, 129, 198 },
+  { 47, 92, 133, 83, 98, 193, 132, 202 },
+  { 49, 92, 133, 86, 99, 197, 134, 205 },
+  { 52, 93, 133, 90, 100, 201, 137, 208 },
+  { 54, 93, 133, 94, 100, 204, 139, 211 },
+  { 57, 94, 134, 98, 101, 208, 142, 214 },
+  { 59, 94, 134, 101, 102, 211, 144, 216 },
+  { 62, 94, 135, 105, 103, 214, 146, 218 },
+  { 64, 94, 135, 108, 103, 216, 148, 220 },
+  { 66, 95, 135, 111, 104, 219, 151, 222 },
+  { 68, 95, 135, 114, 105, 221, 153, 223 },
+  { 71, 96, 136, 117, 106, 224, 155, 225 },
+  { 73, 96, 136, 120, 106, 225, 157, 226 },
+  { 76, 97, 136, 123, 107, 227, 159, 228 },
+  { 78, 97, 136, 126, 108, 229, 160, 229 },
+  { 80, 98, 137, 129, 109, 231, 162, 231 },
+  { 82, 98, 137, 131, 109, 232, 164, 232 },
+  { 84, 98, 138, 134, 110, 234, 166, 233 },
+  { 86, 98, 138, 137, 111, 235, 168, 234 },
+  { 89, 99, 138, 140, 112, 236, 170, 235 },
+  { 91, 99, 138, 142, 112, 237, 171, 235 },
+  { 93, 100, 139, 145, 113, 238, 173, 236 },
+  { 95, 100, 139, 147, 114, 239, 174, 237 },
+  { 97, 101, 140, 149, 115, 240, 176, 238 },
+  { 99, 101, 140, 151, 115, 241, 177, 238 },
+  { 101, 102, 140, 154, 116, 242, 179, 239 },
+  { 103, 102, 140, 156, 117, 242, 180, 239 },
+  { 105, 103, 141, 158, 118, 243, 182, 240 },
+  { 107, 103, 141, 160, 118, 243, 183, 240 },
+  { 109, 104, 141, 162, 119, 244, 185, 241 },
+  { 111, 104, 141, 164, 119, 244, 186, 241 },
+  { 113, 104, 142, 166, 120, 245, 187, 242 },
+  { 114, 104, 142, 168, 121, 245, 188, 242 },
+  { 116, 105, 143, 170, 122, 246, 190, 243 },
+  { 118, 105, 143, 171, 122, 246, 191, 243 },
+  { 120, 106, 143, 173, 123, 247, 192, 244 },
+  { 121, 106, 143, 175, 124, 247, 193, 244 },
+  { 123, 107, 144, 177, 125, 248, 195, 244 },
+  { 125, 107, 144, 178, 125, 248, 196, 244 },
+  { 127, 108, 145, 180, 126, 249, 197, 245 },
+  { 128, 108, 145, 181, 127, 249, 198, 245 },
+  { 130, 109, 145, 183, 128, 249, 199, 245 },
+  { 132, 109, 145, 184, 128, 249, 200, 245 },
+  { 134, 110, 146, 186, 129, 250, 201, 246 },
+  { 135, 110, 146, 187, 130, 250, 202, 246 },
+  { 137, 111, 147, 189, 131, 251, 203, 246 },
+  { 138, 111, 147, 190, 131, 251, 204, 246 },
+  { 140, 112, 147, 192, 132, 251, 205, 247 },
+  { 141, 112, 147, 193, 132, 251, 206, 247 },
+  { 143, 113, 148, 194, 133, 251, 207, 247 },
+  { 144, 113, 148, 195, 134, 251, 207, 247 },
+  { 146, 114, 149, 197, 135, 252, 208, 248 },
+  { 147, 114, 149, 198, 135, 252, 209, 248 },
+  { 149, 115, 149, 199, 136, 252, 210, 248 },
+  { 150, 115, 149, 200, 137, 252, 210, 248 },
+  { 152, 115, 150, 201, 138, 252, 211, 248 },
+  { 153, 115, 150, 202, 138, 252, 212, 248 },
+  { 155, 116, 151, 204, 139, 253, 213, 249 },
+  { 156, 116, 151, 205, 139, 253, 213, 249 },
+  { 158, 117, 151, 206, 140, 253, 214, 249 },
+  { 159, 117, 151, 207, 141, 253, 215, 249 },
+  { 161, 118, 152, 208, 142, 253, 216, 249 },
+  { 162, 118, 152, 209, 142, 253, 216, 249 },
+  { 163, 119, 153, 210, 143, 253, 217, 249 },
+  { 164, 119, 153, 211, 143, 253, 217, 249 },
+  { 166, 120, 153, 212, 144, 254, 218, 250 },
+  { 167, 120, 153, 212, 145, 254, 219, 250 },
+  { 168, 121, 154, 213, 146, 254, 220, 250 },
+  { 169, 121, 154, 214, 146, 254, 220, 250 },
+  { 171, 122, 155, 215, 147, 254, 221, 250 },
+  { 172, 122, 155, 216, 147, 254, 221, 250 },
+  { 173, 123, 155, 217, 148, 254, 222, 250 },
+  { 174, 123, 155, 217, 149, 254, 222, 250 },
+  { 176, 124, 156, 218, 150, 254, 223, 250 },
+  { 177, 124, 156, 219, 150, 254, 223, 250 },
+  { 178, 125, 157, 220, 151, 254, 224, 251 },
+  { 179, 125, 157, 220, 151, 254, 224, 251 },
+  { 180, 126, 157, 221, 152, 254, 225, 251 },
+  { 181, 126, 157, 221, 152, 254, 225, 251 },
+  { 183, 127, 158, 222, 153, 254, 226, 251 },
+  { 184, 127, 158, 223, 154, 254, 226, 251 },
+  { 185, 128, 159, 224, 155, 255, 227, 251 },
+  { 186, 128, 159, 224, 155, 255, 227, 251 },
+  { 187, 129, 160, 225, 156, 255, 228, 251 },
+  { 188, 130, 160, 225, 156, 255, 228, 251 },
+  { 189, 131, 160, 226, 157, 255, 228, 251 },
+  { 190, 131, 160, 226, 158, 255, 228, 251 },
+  { 191, 132, 161, 227, 159, 255, 229, 251 },
+  { 192, 132, 161, 227, 159, 255, 229, 251 },
+  { 193, 133, 162, 228, 160, 255, 230, 252 },
+  { 194, 133, 162, 229, 160, 255, 230, 252 },
+  { 195, 134, 163, 230, 161, 255, 231, 252 },
+  { 196, 134, 163, 230, 161, 255, 231, 252 },
+  { 197, 135, 163, 231, 162, 255, 231, 252 },
+  { 198, 135, 163, 231, 162, 255, 231, 252 },
+  { 199, 136, 164, 232, 163, 255, 232, 252 },
+  { 200, 136, 164, 232, 164, 255, 232, 252 },
+  { 201, 137, 165, 233, 165, 255, 233, 252 },
+  { 201, 137, 165, 233, 165, 255, 233, 252 },
+  { 202, 138, 166, 233, 166, 255, 233, 252 },
+  { 203, 138, 166, 233, 166, 255, 233, 252 },
+  { 204, 139, 166, 234, 167, 255, 234, 252 },
+  { 205, 139, 166, 234, 167, 255, 234, 252 },
+  { 206, 140, 167, 235, 168, 255, 235, 252 },
+  { 206, 140, 167, 235, 168, 255, 235, 252 },
+  { 207, 141, 168, 236, 169, 255, 235, 252 },
+  { 208, 141, 168, 236, 170, 255, 235, 252 },
+  { 209, 142, 169, 237, 171, 255, 236, 252 },
+  { 209, 143, 169, 237, 171, 255, 236, 252 },
+  { 210, 144, 169, 237, 172, 255, 236, 252 },
+  { 211, 144, 169, 237, 172, 255, 236, 252 },
+  { 212, 145, 170, 238, 173, 255, 237, 252 },
+  { 213, 145, 170, 238, 173, 255, 237, 252 },
+  { 214, 146, 171, 239, 174, 255, 237, 253 },
+  { 214, 146, 171, 239, 174, 255, 237, 253 },
+  { 215, 147, 172, 240, 175, 255, 238, 253 },
+  { 215, 147, 172, 240, 175, 255, 238, 253 },
+  { 216, 148, 173, 240, 176, 255, 238, 253 },
+  { 217, 148, 173, 240, 176, 255, 238, 253 },
+  { 218, 149, 173, 241, 177, 255, 239, 253 },
+  { 218, 149, 173, 241, 178, 255, 239, 253 },
+  { 219, 150, 174, 241, 179, 255, 239, 253 },
+  { 219, 151, 174, 241, 179, 255, 239, 253 },
+  { 220, 152, 175, 242, 180, 255, 240, 253 },
+  { 221, 152, 175, 242, 180, 255, 240, 253 },
+  { 222, 153, 176, 242, 181, 255, 240, 253 },
+  { 222, 153, 176, 242, 181, 255, 240, 253 },
+  { 223, 154, 177, 243, 182, 255, 240, 253 },
+  { 223, 154, 177, 243, 182, 255, 240, 253 },
+  { 224, 155, 178, 244, 183, 255, 241, 253 },
+  { 224, 155, 178, 244, 183, 255, 241, 253 },
+  { 225, 156, 178, 244, 184, 255, 241, 253 },
+  { 225, 157, 178, 244, 184, 255, 241, 253 },
+  { 226, 158, 179, 244, 185, 255, 242, 253 },
+  { 227, 158, 179, 244, 185, 255, 242, 253 },
+  { 228, 159, 180, 245, 186, 255, 242, 253 },
+  { 228, 159, 180, 245, 186, 255, 242, 253 },
+  { 229, 160, 181, 245, 187, 255, 242, 253 },
+  { 229, 160, 181, 245, 187, 255, 242, 253 },
+  { 230, 161, 182, 246, 188, 255, 243, 253 },
+  { 230, 162, 182, 246, 188, 255, 243, 253 },
+  { 231, 163, 183, 246, 189, 255, 243, 253 },
+  { 231, 163, 183, 246, 189, 255, 243, 253 },
+  { 232, 164, 184, 247, 190, 255, 243, 253 },
+  { 232, 164, 184, 247, 190, 255, 243, 253 },
+  { 233, 165, 185, 247, 191, 255, 244, 253 },
+  { 233, 165, 185, 247, 191, 255, 244, 253 },
+  { 234, 166, 185, 247, 192, 255, 244, 253 },
+  { 234, 167, 185, 247, 192, 255, 244, 253 },
+  { 235, 168, 186, 248, 193, 255, 244, 253 },
+  { 235, 168, 186, 248, 193, 255, 244, 253 },
+  { 236, 169, 187, 248, 194, 255, 244, 253 },
+  { 236, 169, 187, 248, 194, 255, 244, 253 },
+  { 236, 170, 188, 248, 195, 255, 245, 253 },
+  { 236, 170, 188, 248, 195, 255, 245, 253 },
+  { 237, 171, 189, 249, 196, 255, 245, 254 },
+  { 237, 172, 189, 249, 196, 255, 245, 254 },
+  { 238, 173, 190, 249, 197, 255, 245, 254 },
+  { 238, 173, 190, 249, 197, 255, 245, 254 },
+  { 239, 174, 191, 249, 198, 255, 245, 254 },
+  { 239, 174, 191, 249, 198, 255, 245, 254 },
+  { 240, 175, 192, 249, 199, 255, 246, 254 },
+  { 240, 176, 192, 249, 199, 255, 246, 254 },
+  { 240, 177, 193, 250, 200, 255, 246, 254 },
+  { 240, 177, 193, 250, 200, 255, 246, 254 },
+  { 241, 178, 194, 250, 201, 255, 246, 254 },
+  { 241, 178, 194, 250, 201, 255, 246, 254 },
+  { 242, 179, 195, 250, 202, 255, 246, 254 },
+  { 242, 180, 195, 250, 202, 255, 246, 254 },
+  { 242, 181, 196, 250, 203, 255, 247, 254 },
+  { 242, 181, 196, 250, 203, 255, 247, 254 },
+  { 243, 182, 197, 251, 204, 255, 247, 254 },
+  { 243, 183, 197, 251, 204, 255, 247, 254 },
+  { 244, 184, 198, 251, 205, 255, 247, 254 },
+  { 244, 184, 198, 251, 205, 255, 247, 254 },
+  { 244, 185, 199, 251, 206, 255, 247, 254 },
+  { 244, 185, 199, 251, 206, 255, 247, 254 },
+  { 245, 186, 200, 251, 207, 255, 247, 254 },
+  { 245, 187, 200, 251, 207, 255, 247, 254 },
+  { 246, 188, 201, 252, 207, 255, 248, 254 },
+  { 246, 188, 201, 252, 207, 255, 248, 254 },
+  { 246, 189, 202, 252, 208, 255, 248, 254 },
+  { 246, 190, 202, 252, 208, 255, 248, 254 },
+  { 247, 191, 203, 252, 209, 255, 248, 254 },
+  { 247, 191, 203, 252, 209, 255, 248, 254 },
+  { 247, 192, 204, 252, 210, 255, 248, 254 },
+  { 247, 193, 204, 252, 210, 255, 248, 254 },
+  { 248, 194, 205, 252, 211, 255, 248, 254 },
+  { 248, 194, 205, 252, 211, 255, 248, 254 },
+  { 248, 195, 206, 252, 212, 255, 249, 254 },
+  { 248, 196, 206, 252, 212, 255, 249, 254 },
+  { 249, 197, 207, 253, 213, 255, 249, 254 },
+  { 249, 197, 207, 253, 213, 255, 249, 254 },
+  { 249, 198, 208, 253, 214, 255, 249, 254 },
+  { 249, 199, 209, 253, 214, 255, 249, 254 },
+  { 250, 200, 210, 253, 215, 255, 249, 254 },
+  { 250, 200, 210, 253, 215, 255, 249, 254 },
+  { 250, 201, 211, 253, 215, 255, 249, 254 },
+  { 250, 202, 211, 253, 215, 255, 249, 254 },
+  { 250, 203, 212, 253, 216, 255, 249, 254 },
+  { 250, 203, 212, 253, 216, 255, 249, 254 },
+  { 251, 204, 213, 253, 217, 255, 250, 254 },
+  { 251, 205, 213, 253, 217, 255, 250, 254 },
+  { 251, 206, 214, 254, 218, 255, 250, 254 },
+  { 251, 206, 215, 254, 218, 255, 250, 254 },
+  { 252, 207, 216, 254, 219, 255, 250, 254 },
+  { 252, 208, 216, 254, 219, 255, 250, 254 },
+  { 252, 209, 217, 254, 220, 255, 250, 254 },
+  { 252, 210, 217, 254, 220, 255, 250, 254 },
+  { 252, 211, 218, 254, 221, 255, 250, 254 },
+  { 252, 212, 218, 254, 221, 255, 250, 254 },
+  { 253, 213, 219, 254, 222, 255, 250, 254 },
+  { 253, 213, 220, 254, 222, 255, 250, 254 },
+  { 253, 214, 221, 254, 223, 255, 250, 254 },
+  { 253, 215, 221, 254, 223, 255, 250, 254 },
+  { 253, 216, 222, 254, 224, 255, 251, 254 },
+  { 253, 217, 223, 254, 224, 255, 251, 254 },
+  { 253, 218, 224, 254, 225, 255, 251, 254 },
+  { 253, 219, 224, 254, 225, 255, 251, 254 },
+  { 254, 220, 225, 254, 225, 255, 251, 254 },
+  { 254, 221, 226, 254, 225, 255, 251, 254 },
+  { 254, 222, 227, 255, 226, 255, 251, 254 },
+  { 254, 223, 227, 255, 226, 255, 251, 254 },
+  { 254, 224, 228, 255, 227, 255, 251, 254 },
+  { 254, 225, 229, 255, 227, 255, 251, 254 },
+  { 254, 226, 230, 255, 228, 255, 251, 254 },
+  { 254, 227, 230, 255, 229, 255, 251, 254 },
+  { 255, 228, 231, 255, 230, 255, 251, 254 },
+  { 255, 229, 232, 255, 230, 255, 251, 254 },
+  { 255, 230, 233, 255, 231, 255, 252, 254 },
+  { 255, 231, 234, 255, 231, 255, 252, 254 },
+  { 255, 232, 235, 255, 232, 255, 252, 254 },
+  { 255, 233, 236, 255, 232, 255, 252, 254 },
+  { 255, 235, 237, 255, 233, 255, 252, 254 },
+  { 255, 236, 238, 255, 234, 255, 252, 254 },
+  { 255, 238, 240, 255, 235, 255, 252, 255 },
+  { 255, 239, 241, 255, 235, 255, 252, 254 },
+  { 255, 241, 243, 255, 236, 255, 252, 254 },
+  { 255, 243, 245, 255, 237, 255, 252, 254 },
+  { 255, 246, 247, 255, 239, 255, 253, 255 },
+};
+
+#if CONFIG_ANS
+// Model obtained from a 2-sided zero-centerd distribuition derived
+// from a Pareto distribution. The cdf of the distribution is:
+// cdf(x) = 0.5 + 0.5 * sgn(x) * [1 - {alpha/(alpha + |x|)} ^ beta]
+//
+// For a given beta and a given probablity of the 1-node, the alpha
+// is first solved, and then the {alpha, beta} pair is used to generate
+// the probabilities for the rest of the nodes.
+//
+// beta = 8
+// Values for tokens ONE_TOKEN through CATEGORY6_TOKEN included here.
+// ZERO_TOKEN and EOB_TOKEN are coded as flags outside this coder.
+const AnsP10 vp10_pareto8_token_probs[COEFF_PROB_MODELS][ENTROPY_TOKENS - 2] = {
+  { 4, 4, 4, 4, 8, 15, 30, 57, 103, 795 },
+  { 8, 8, 8, 8, 15, 30, 57, 103, 168, 619 },
+  { 12, 12, 12, 12, 23, 43, 80, 138, 205, 487 },
+  { 16, 16, 15, 15, 30, 56, 101, 165, 225, 385 },
+  { 20, 20, 19, 19, 36, 68, 119, 186, 231, 306 },
+  { 24, 23, 23, 22, 43, 79, 135, 201, 230, 244 },
+  { 28, 27, 26, 26, 49, 89, 149, 211, 223, 196 },
+  { 32, 31, 30, 29, 55, 98, 160, 218, 212, 159 },
+  { 36, 35, 33, 32, 60, 107, 171, 221, 200, 129 },
+  { 40, 38, 37, 35, 66, 115, 179, 222, 187, 105 },
+  { 44, 42, 40, 38, 71, 122, 186, 221, 174, 86 },
+  { 48, 45, 43, 41, 76, 129, 192, 219, 160, 71 },
+  { 52, 49, 46, 44, 80, 136, 196, 215, 148, 58 },
+  { 56, 53, 49, 46, 85, 142, 200, 210, 135, 48 },
+  { 60, 56, 52, 49, 89, 147, 203, 204, 124, 40 },
+  { 64, 60, 55, 52, 93, 151, 205, 198, 113, 33 },
+  { 68, 63, 58, 54, 97, 156, 205, 192, 103, 28 },
+  { 72, 66, 61, 57, 100, 160, 206, 185, 94, 23 },
+  { 76, 70, 64, 59, 104, 163, 205, 178, 85, 20 },
+  { 80, 73, 67, 61, 107, 166, 205, 171, 77, 17 },
+  { 84, 76, 69, 63, 110, 169, 204, 164, 71, 14 },
+  { 88, 80, 72, 65, 113, 171, 202, 157, 64, 12 },
+  { 92, 83, 75, 67, 116, 173, 200, 150, 58, 10 },
+  { 96, 86, 77, 69, 118, 175, 198, 143, 53, 9 },
+  { 100, 89, 80, 71, 121, 176, 195, 137, 48, 7 },
+  { 104, 92, 82, 73, 123, 178, 192, 130, 44, 6 },
+  { 108, 96, 84, 75, 125, 178, 189, 124, 40, 5 },
+  { 112, 98, 87, 76, 127, 179, 186, 118, 36, 5 },
+  { 116, 101, 89, 78, 129, 179, 183, 112, 33, 4 },
+  { 120, 104, 91, 80, 131, 180, 179, 106, 30, 3 },
+  { 124, 107, 93, 81, 132, 180, 176, 101, 27, 3 },
+  { 128, 110, 95, 82, 134, 179, 172, 96, 25, 3 },
+  { 132, 113, 97, 84, 135, 179, 168, 91, 23, 2 },
+  { 136, 116, 99, 85, 136, 179, 164, 86, 21, 2 },
+  { 140, 119, 101, 86, 137, 178, 160, 82, 19, 2 },
+  { 144, 122, 103, 88, 138, 177, 157, 77, 17, 1 },
+  { 148, 124, 105, 89, 139, 176, 153, 73, 16, 1 },
+  { 152, 127, 107, 90, 140, 175, 149, 69, 14, 1 },
+  { 156, 130, 108, 91, 141, 173, 145, 66, 13, 1 },
+  { 160, 133, 110, 92, 141, 172, 141, 62, 12, 1 },
+  { 164, 135, 111, 93, 142, 171, 137, 59, 11, 1 },
+  { 168, 138, 113, 94, 142, 169, 133, 56, 10, 1 },
+  { 172, 140, 115, 94, 142, 168, 130, 53, 9, 1 },
+  { 176, 143, 116, 95, 143, 166, 126, 50, 8, 1 },
+  { 180, 145, 118, 96, 143, 164, 122, 47, 8, 1 },
+  { 184, 147, 119, 96, 143, 163, 119, 45, 7, 1 },
+  { 188, 150, 120, 97, 143, 161, 116, 42, 6, 1 },
+  { 192, 152, 121, 98, 143, 159, 112, 40, 6, 1 },
+  { 196, 155, 123, 98, 142, 157, 109, 38, 5, 1 },
+  { 200, 157, 124, 99, 142, 155, 105, 36, 5, 1 },
+  { 204, 159, 125, 99, 142, 153, 102, 34, 5, 1 },
+  { 208, 161, 126, 100, 142, 151, 99, 32, 4, 1 },
+  { 212, 164, 127, 100, 141, 149, 96, 30, 4, 1 },
+  { 216, 166, 129, 100, 141, 147, 93, 28, 3, 1 },
+  { 220, 168, 130, 101, 140, 144, 90, 27, 3, 1 },
+  { 224, 170, 131, 101, 140, 142, 87, 25, 3, 1 },
+  { 228, 172, 132, 101, 139, 140, 84, 24, 3, 1 },
+  { 232, 174, 132, 101, 139, 138, 81, 23, 3, 1 },
+  { 236, 176, 133, 101, 138, 136, 79, 22, 2, 1 },
+  { 240, 178, 134, 102, 137, 134, 76, 20, 2, 1 },
+  { 244, 180, 135, 102, 136, 131, 74, 19, 2, 1 },
+  { 248, 182, 135, 102, 136, 129, 71, 18, 2, 1 },
+  { 252, 184, 136, 101, 135, 127, 69, 17, 2, 1 },
+  { 256, 186, 137, 102, 134, 124, 66, 16, 2, 1 },
+  { 260, 188, 138, 102, 133, 122, 64, 15, 1, 1 },
+  { 264, 190, 138, 101, 132, 120, 62, 15, 1, 1 },
+  { 268, 191, 139, 101, 131, 118, 60, 14, 1, 1 },
+  { 272, 193, 139, 101, 130, 116, 58, 13, 1, 1 },
+  { 276, 195, 139, 101, 129, 114, 56, 12, 1, 1 },
+  { 280, 196, 140, 101, 128, 111, 54, 12, 1, 1 },
+  { 284, 198, 140, 101, 127, 109, 52, 11, 1, 1 },
+  { 288, 200, 141, 100, 126, 107, 50, 10, 1, 1 },
+  { 292, 201, 141, 100, 125, 105, 48, 10, 1, 1 },
+  { 296, 203, 141, 100, 123, 103, 47, 9, 1, 1 },
+  { 300, 204, 142, 99, 122, 101, 45, 9, 1, 1 },
+  { 304, 206, 142, 99, 121, 99, 43, 8, 1, 1 },
+  { 308, 207, 142, 99, 119, 97, 42, 8, 1, 1 },
+  { 312, 209, 142, 99, 118, 95, 40, 7, 1, 1 },
+  { 316, 210, 142, 98, 117, 93, 39, 7, 1, 1 },
+  { 320, 211, 142, 98, 116, 91, 37, 7, 1, 1 },
+  { 324, 213, 142, 97, 115, 89, 36, 6, 1, 1 },
+  { 328, 214, 142, 97, 113, 87, 35, 6, 1, 1 },
+  { 332, 215, 143, 96, 112, 85, 33, 6, 1, 1 },
+  { 336, 216, 143, 96, 111, 83, 32, 5, 1, 1 },
+  { 340, 218, 143, 95, 109, 81, 31, 5, 1, 1 },
+  { 344, 219, 142, 95, 108, 79, 30, 5, 1, 1 },
+  { 348, 220, 142, 94, 107, 78, 29, 4, 1, 1 },
+  { 352, 221, 142, 94, 105, 76, 28, 4, 1, 1 },
+  { 356, 222, 142, 93, 104, 74, 27, 4, 1, 1 },
+  { 360, 223, 142, 92, 103, 72, 26, 4, 1, 1 },
+  { 364, 224, 142, 92, 101, 70, 25, 4, 1, 1 },
+  { 368, 225, 142, 91, 100, 69, 24, 3, 1, 1 },
+  { 372, 226, 141, 91, 99, 67, 23, 3, 1, 1 },
+  { 376, 227, 141, 90, 97, 66, 22, 3, 1, 1 },
+  { 380, 228, 141, 89, 96, 64, 21, 3, 1, 1 },
+  { 384, 229, 140, 89, 95, 62, 20, 3, 1, 1 },
+  { 388, 229, 140, 88, 93, 61, 20, 3, 1, 1 },
+  { 392, 230, 140, 87, 92, 60, 19, 2, 1, 1 },
+  { 396, 231, 140, 86, 91, 58, 18, 2, 1, 1 },
+  { 400, 232, 139, 86, 89, 57, 17, 2, 1, 1 },
+  { 404, 232, 139, 85, 88, 55, 17, 2, 1, 1 },
+  { 408, 233, 138, 84, 87, 54, 16, 2, 1, 1 },
+  { 412, 234, 138, 84, 85, 52, 15, 2, 1, 1 },
+  { 416, 234, 137, 83, 84, 51, 15, 2, 1, 1 },
+  { 420, 235, 137, 82, 82, 50, 14, 2, 1, 1 },
+  { 424, 236, 136, 81, 81, 48, 14, 2, 1, 1 },
+  { 428, 236, 136, 81, 80, 47, 13, 1, 1, 1 },
+  { 432, 236, 135, 80, 79, 46, 13, 1, 1, 1 },
+  { 436, 237, 135, 79, 77, 45, 12, 1, 1, 1 },
+  { 440, 238, 134, 78, 76, 43, 12, 1, 1, 1 },
+  { 444, 238, 134, 77, 75, 42, 11, 1, 1, 1 },
+  { 448, 238, 133, 77, 73, 41, 11, 1, 1, 1 },
+  { 452, 239, 132, 76, 72, 40, 10, 1, 1, 1 },
+  { 456, 239, 131, 75, 71, 39, 10, 1, 1, 1 },
+  { 460, 239, 131, 74, 70, 38, 9, 1, 1, 1 },
+  { 464, 240, 130, 73, 68, 37, 9, 1, 1, 1 },
+  { 468, 240, 129, 72, 67, 36, 9, 1, 1, 1 },
+  { 472, 240, 128, 72, 66, 35, 8, 1, 1, 1 },
+  { 476, 240, 127, 71, 65, 34, 8, 1, 1, 1 },
+  { 480, 240, 127, 70, 63, 33, 8, 1, 1, 1 },
+  { 484, 241, 126, 69, 62, 32, 7, 1, 1, 1 },
+  { 488, 241, 125, 68, 61, 31, 7, 1, 1, 1 },
+  { 492, 241, 124, 67, 60, 30, 7, 1, 1, 1 },
+  { 496, 241, 124, 66, 59, 29, 6, 1, 1, 1 },
+  { 500, 240, 123, 66, 58, 28, 6, 1, 1, 1 },
+  { 504, 240, 122, 65, 57, 27, 6, 1, 1, 1 },
+  { 508, 240, 121, 64, 55, 27, 6, 1, 1, 1 },
+  { 512, 241, 120, 63, 54, 26, 5, 1, 1, 1 },
+  { 516, 241, 119, 62, 53, 25, 5, 1, 1, 1 },
+  { 520, 240, 118, 62, 52, 24, 5, 1, 1, 1 },
+  { 524, 240, 117, 60, 51, 24, 5, 1, 1, 1 },
+  { 528, 239, 116, 60, 50, 23, 5, 1, 1, 1 },
+  { 532, 239, 116, 59, 49, 22, 4, 1, 1, 1 },
+  { 536, 239, 115, 58, 48, 21, 4, 1, 1, 1 },
+  { 540, 239, 113, 57, 47, 21, 4, 1, 1, 1 },
+  { 544, 238, 113, 56, 46, 20, 4, 1, 1, 1 },
+  { 548, 238, 112, 55, 45, 19, 4, 1, 1, 1 },
+  { 552, 238, 110, 55, 44, 19, 3, 1, 1, 1 },
+  { 556, 237, 110, 54, 43, 18, 3, 1, 1, 1 },
+  { 560, 237, 108, 53, 42, 18, 3, 1, 1, 1 },
+  { 564, 236, 108, 52, 41, 17, 3, 1, 1, 1 },
+  { 568, 236, 106, 51, 40, 17, 3, 1, 1, 1 },
+  { 572, 235, 105, 51, 39, 16, 3, 1, 1, 1 },
+  { 576, 235, 104, 50, 38, 15, 3, 1, 1, 1 },
+  { 580, 234, 103, 49, 37, 15, 3, 1, 1, 1 },
+  { 584, 234, 102, 48, 37, 14, 2, 1, 1, 1 },
+  { 588, 233, 101, 47, 36, 14, 2, 1, 1, 1 },
+  { 592, 233, 100, 46, 35, 13, 2, 1, 1, 1 },
+  { 596, 231, 99, 46, 34, 13, 2, 1, 1, 1 },
+  { 600, 230, 98, 45, 33, 13, 2, 1, 1, 1 },
+  { 604, 230, 97, 44, 32, 12, 2, 1, 1, 1 },
+  { 608, 229, 96, 43, 31, 12, 2, 1, 1, 1 },
+  { 612, 228, 95, 42, 31, 11, 2, 1, 1, 1 },
+  { 616, 227, 93, 42, 30, 11, 2, 1, 1, 1 },
+  { 620, 227, 92, 41, 29, 10, 2, 1, 1, 1 },
+  { 624, 226, 92, 40, 28, 10, 1, 1, 1, 1 },
+  { 628, 225, 90, 39, 28, 10, 1, 1, 1, 1 },
+  { 632, 224, 89, 39, 27, 9, 1, 1, 1, 1 },
+  { 636, 223, 88, 38, 26, 9, 1, 1, 1, 1 },
+  { 640, 222, 87, 37, 25, 9, 1, 1, 1, 1 },
+  { 644, 221, 86, 36, 25, 8, 1, 1, 1, 1 },
+  { 648, 220, 84, 36, 24, 8, 1, 1, 1, 1 },
+  { 652, 219, 83, 35, 23, 8, 1, 1, 1, 1 },
+  { 656, 218, 82, 34, 23, 7, 1, 1, 1, 1 },
+  { 660, 217, 81, 33, 22, 7, 1, 1, 1, 1 },
+  { 664, 215, 80, 33, 21, 7, 1, 1, 1, 1 },
+  { 668, 214, 78, 32, 21, 7, 1, 1, 1, 1 },
+  { 672, 213, 78, 31, 20, 6, 1, 1, 1, 1 },
+  { 676, 211, 76, 31, 20, 6, 1, 1, 1, 1 },
+  { 680, 210, 75, 30, 19, 6, 1, 1, 1, 1 },
+  { 684, 209, 74, 29, 18, 6, 1, 1, 1, 1 },
+  { 688, 208, 73, 28, 18, 5, 1, 1, 1, 1 },
+  { 692, 206, 72, 28, 17, 5, 1, 1, 1, 1 },
+  { 696, 205, 70, 27, 17, 5, 1, 1, 1, 1 },
+  { 700, 203, 69, 27, 16, 5, 1, 1, 1, 1 },
+  { 704, 201, 68, 26, 16, 5, 1, 1, 1, 1 },
+  { 708, 201, 67, 25, 15, 4, 1, 1, 1, 1 },
+  { 712, 198, 66, 25, 15, 4, 1, 1, 1, 1 },
+  { 716, 197, 65, 24, 14, 4, 1, 1, 1, 1 },
+  { 720, 196, 63, 23, 14, 4, 1, 1, 1, 1 },
+  { 724, 194, 62, 23, 13, 4, 1, 1, 1, 1 },
+  { 728, 193, 61, 22, 13, 3, 1, 1, 1, 1 },
+  { 732, 191, 60, 22, 12, 3, 1, 1, 1, 1 },
+  { 736, 189, 59, 21, 12, 3, 1, 1, 1, 1 },
+  { 740, 188, 58, 20, 11, 3, 1, 1, 1, 1 },
+  { 744, 186, 56, 20, 11, 3, 1, 1, 1, 1 },
+  { 748, 184, 55, 19, 11, 3, 1, 1, 1, 1 },
+  { 752, 182, 54, 19, 10, 3, 1, 1, 1, 1 },
+  { 756, 181, 53, 18, 10, 2, 1, 1, 1, 1 },
+  { 760, 179, 52, 18, 9, 2, 1, 1, 1, 1 },
+  { 764, 177, 51, 17, 9, 2, 1, 1, 1, 1 },
+  { 768, 174, 50, 17, 9, 2, 1, 1, 1, 1 },
+  { 772, 173, 49, 16, 8, 2, 1, 1, 1, 1 },
+  { 776, 171, 47, 16, 8, 2, 1, 1, 1, 1 },
+  { 780, 169, 46, 15, 8, 2, 1, 1, 1, 1 },
+  { 784, 167, 45, 15, 7, 2, 1, 1, 1, 1 },
+  { 788, 165, 44, 14, 7, 2, 1, 1, 1, 1 },
+  { 792, 162, 43, 14, 7, 2, 1, 1, 1, 1 },
+  { 796, 161, 42, 13, 7, 1, 1, 1, 1, 1 },
+  { 800, 159, 41, 13, 6, 1, 1, 1, 1, 1 },
+  { 804, 157, 40, 12, 6, 1, 1, 1, 1, 1 },
+  { 808, 154, 39, 12, 6, 1, 1, 1, 1, 1 },
+  { 812, 153, 38, 11, 5, 1, 1, 1, 1, 1 },
+  { 816, 150, 37, 11, 5, 1, 1, 1, 1, 1 },
+  { 820, 148, 36, 10, 5, 1, 1, 1, 1, 1 },
+  { 824, 145, 35, 10, 5, 1, 1, 1, 1, 1 },
+  { 828, 143, 34, 10, 4, 1, 1, 1, 1, 1 },
+  { 832, 141, 33, 9, 4, 1, 1, 1, 1, 1 },
+  { 836, 138, 32, 9, 4, 1, 1, 1, 1, 1 },
+  { 840, 136, 30, 9, 4, 1, 1, 1, 1, 1 },
+  { 844, 133, 30, 8, 4, 1, 1, 1, 1, 1 },
+  { 848, 131, 29, 8, 3, 1, 1, 1, 1, 1 },
+  { 852, 129, 28, 7, 3, 1, 1, 1, 1, 1 },
+  { 856, 126, 27, 7, 3, 1, 1, 1, 1, 1 },
+  { 860, 123, 26, 7, 3, 1, 1, 1, 1, 1 },
+  { 864, 121, 25, 6, 3, 1, 1, 1, 1, 1 },
+  { 868, 118, 24, 6, 3, 1, 1, 1, 1, 1 },
+  { 872, 116, 23, 6, 2, 1, 1, 1, 1, 1 },
+  { 876, 113, 22, 6, 2, 1, 1, 1, 1, 1 },
+  { 880, 111, 21, 5, 2, 1, 1, 1, 1, 1 },
+  { 884, 108, 20, 5, 2, 1, 1, 1, 1, 1 },
+  { 888, 105, 19, 5, 2, 1, 1, 1, 1, 1 },
+  { 892, 102, 19, 4, 2, 1, 1, 1, 1, 1 },
+  { 896, 99, 18, 4, 2, 1, 1, 1, 1, 1 },
+  { 900, 97, 17, 4, 1, 1, 1, 1, 1, 1 },
+  { 904, 94, 16, 4, 1, 1, 1, 1, 1, 1 },
+  { 908, 92, 15, 3, 1, 1, 1, 1, 1, 1 },
+  { 912, 89, 14, 3, 1, 1, 1, 1, 1, 1 },
+  { 916, 85, 14, 3, 1, 1, 1, 1, 1, 1 },
+  { 920, 82, 13, 3, 1, 1, 1, 1, 1, 1 },
+  { 924, 79, 12, 3, 1, 1, 1, 1, 1, 1 },
+  { 928, 77, 11, 2, 1, 1, 1, 1, 1, 1 },
+  { 932, 73, 11, 2, 1, 1, 1, 1, 1, 1 },
+  { 936, 70, 10, 2, 1, 1, 1, 1, 1, 1 },
+  { 940, 67, 9, 2, 1, 1, 1, 1, 1, 1 },
+  { 944, 64, 8, 2, 1, 1, 1, 1, 1, 1 },
+  { 948, 60, 8, 2, 1, 1, 1, 1, 1, 1 },
+  { 952, 58, 7, 1, 1, 1, 1, 1, 1, 1 },
+  { 956, 54, 7, 1, 1, 1, 1, 1, 1, 1 },
+  { 960, 51, 6, 1, 1, 1, 1, 1, 1, 1 },
+  { 964, 48, 5, 1, 1, 1, 1, 1, 1, 1 },
+  { 968, 44, 5, 1, 1, 1, 1, 1, 1, 1 },
+  { 972, 41, 4, 1, 1, 1, 1, 1, 1, 1 },
+  { 976, 37, 4, 1, 1, 1, 1, 1, 1, 1 },
+  { 980, 34, 3, 1, 1, 1, 1, 1, 1, 1 },
+  { 984, 30, 3, 1, 1, 1, 1, 1, 1, 1 },
+  { 988, 27, 2, 1, 1, 1, 1, 1, 1, 1 },
+  { 992, 23, 2, 1, 1, 1, 1, 1, 1, 1 },
+  { 996, 19, 2, 1, 1, 1, 1, 1, 1, 1 },
+  { 1000, 16, 1, 1, 1, 1, 1, 1, 1, 1 },
+  { 1004, 12, 1, 1, 1, 1, 1, 1, 1, 1 },
+  { 1008, 8, 1, 1, 1, 1, 1, 1, 1, 1 },
+  { 1012, 4, 1, 1, 1, 1, 1, 1, 1, 1 },
+  { 1015, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+  { 1015, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+};
+#endif  // CONFIG_ANS
+
+/* clang-format off */
+#if CONFIG_ENTROPY
+const vp10_coeff_probs_model
+default_qctx_coef_probs[QCTX_BINS][TX_SIZES][PLANE_TYPES] = {
+    {  // Q_Index 0
+        {  // TX_SIZE 0
+            {  // Y plane
+                {  // Intra
+                    {  // band 0
+                        {182,  34, 137}, { 79,  39, 103}, { 10,  28,  51},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 45,  88, 147}, { 46,  80, 140}, { 25,  69, 119},
+                        { 12,  57,  96}, {  4,  41,  65}, {  1,  20,  31},
+                    },
+                    {  // band 2
+                        { 58, 124, 190}, { 39, 106, 178}, { 16,  86, 147},
+                        {  7,  69, 114}, {  3,  50,  80}, {  1,  25,  42},
+                    },
+                    {  // band 3
+                        { 90, 138, 215}, { 54, 116, 198}, { 18,  86, 155},
+                        {  5,  62, 112}, {  1,  38,  68}, {  1,  17,  30},
+                    },
+                    {  // band 4
+                        {126, 149, 231}, { 82, 114, 211}, { 21,  80, 157},
+                        {  6,  56, 105}, {  1,  36,  64}, {  1,  17,  31},
+                    },
+                    {  // band 5
+                        {171,  56, 236}, {140,  54, 219}, { 57,  45, 167},
+                        { 26,  36, 113}, { 11,  29,  72}, {  3,  18,  39},
+                    },
+                },
+                {  // Intra
+                    {  // band 0
+                        {153, 122, 186}, {106, 109, 171}, { 36,  84, 128},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 27, 151, 201}, { 34, 131, 199}, { 23, 102, 161},
+                        { 10,  80, 120}, {  4,  52,  78}, {  1,  24,  37},
+                    },
+                    {  // band 2
+                        { 43, 158, 213}, { 35, 133, 203}, {  8,  92, 151},
+                        {  2,  64, 106}, {  1,  36,  60}, {  1,  13,  24},
+                    },
+                    {  // band 3
+                        { 68, 167, 223}, { 36, 135, 211}, {  9,  94, 157},
+                        {  2,  67, 112}, {  1,  40,  68}, {  1,  17,  31},
+                    },
+                    {  // band 4
+                        {131, 146, 237}, { 72, 119, 223}, { 17,  82, 164},
+                        {  4,  55, 107}, {  1,  34,  63}, {  1,  16,  29},
+                    },
+                    {  // band 5
+                        {184,  68, 244}, {153,  59, 232}, { 68,  51, 179},
+                        { 31,  40, 123}, { 13,  29,  77}, {  4,  17,  37},
+                    },
+                },
+            },
+            {  // UV plane
+                {  // Inter
+                    {  // band 0
+                        {203,  41, 203}, {127,  56, 174}, { 49,  56, 127},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {110, 121, 217}, {119, 113, 213}, { 64,  95, 185},
+                        { 30,  72, 144}, {  8,  42,  76}, {  2,  17,  25},
+                    },
+                    {  // band 2
+                        {127, 159, 229}, {115, 134, 223}, { 36, 100, 189},
+                        { 11,  75, 142}, {  3,  48,  83}, {  1,  19,  33},
+                    },
+                    {  // band 3
+                        {150, 172, 241}, { 90, 133, 231}, { 28, 102, 192},
+                        {  7,  81, 147}, {  1,  53,  91}, {  1,  25,  42},
+                    },
+                    {  // band 4
+                        {184, 144, 248}, {114, 117, 237}, { 37,  89, 192},
+                        { 10,  63, 130}, {  4,  42,  76}, {  1,  19,  38},
+                    },
+                    {  // band 5
+                        {207,  79, 250}, {179,  74, 241}, { 83,  67, 199},
+                        { 38,  51, 142}, { 17,  37,  97}, { 10,  14,  55},
+                    },
+                },
+                {  // Inter
+                    {  // band 0
+                        {220,  82, 232}, {150,  93, 214}, { 66,  95, 177},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {116, 160, 227}, {136, 141, 227}, { 67, 114, 190},
+                        { 40,  94, 148}, { 21,  70, 107}, { 10,  43,  63},
+                    },
+                    {  // band 2
+                        {124, 173, 235}, {105, 147, 226}, { 27, 107, 184},
+                        { 10,  80, 142}, {  3,  50,  86}, {  1,  16,  32},
+                    },
+                    {  // band 3
+                        {149, 179, 243}, { 89, 147, 234}, { 29, 112, 193},
+                        {  9,  94, 157}, {  1,  64, 111}, {  1,  25,  43},
+                    },
+                    {  // band 4
+                        {187, 153, 248}, {127, 130, 241}, { 52,  99, 202},
+                        { 20,  79, 152}, {  4,  50,  93}, {  1,  19,  32},
+                    },
+                    {  // band 5
+                        {215,  82, 251}, {195,  80, 246}, { 93,  70, 204},
+                        { 39,  54, 147}, { 14,  33,  88}, {  6,  14,  39},
+                    },
+                },
+            },
+        },
+        {  // TX_SIZE 1
+            {  // Y plane
+                {  // Intra
+                    {  // band 0
+                        {116,  43, 131}, { 39,  41,  94}, {  4,  28,  47},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 28, 101, 141}, { 27,  95, 140}, { 18,  80, 121},
+                        { 10,  61,  95}, {  4,  39,  60}, {  1,  19,  26},
+                    },
+                    {  // band 2
+                        { 29, 150, 183}, { 19, 127, 175}, {  8,  98, 147},
+                        {  3,  76, 115}, {  1,  55,  84}, {  1,  29,  43},
+                    },
+                    {  // band 3
+                        { 26, 168, 202}, { 12, 138, 188}, {  2,  98, 149},
+                        {  1,  69, 110}, {  1,  40,  65}, {  1,  17,  25},
+                    },
+                    {  // band 4
+                        { 33, 188, 225}, { 12, 155, 207}, {  2, 101, 155},
+                        {  1,  65, 106}, {  1,  36,  60}, {  1,  18,  26},
+                    },
+                    {  // band 5
+                        { 79, 205, 242}, { 30, 168, 224}, {  5, 106, 164},
+                        {  1,  68, 110}, {  1,  39,  65}, {  1,  18,  28},
+                    },
+                },
+                {  // Intra
+                    {  // band 0
+                        { 96,  80, 201}, { 51,  88, 168}, { 14,  78, 116},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {  6, 167, 216}, { 32, 152, 211}, { 24, 121, 182},
+                        { 13,  98, 149}, { 12,  76, 108}, {  8,  48,  62},
+                    },
+                    {  // band 2
+                        { 17, 176, 225}, { 13, 147, 209}, {  3,  96, 155},
+                        {  1,  65, 108}, {  2,  43,  63}, {  2,  23,  25},
+                    },
+                    {  // band 3
+                        { 18, 183, 232}, { 10, 153, 214}, {  1,  96, 154},
+                        {  1,  63, 105}, {  1,  39,  59}, {  1,  21,  24},
+                    },
+                    {  // band 4
+                        { 23, 191, 239}, {  8, 159, 221}, {  1,  97, 158},
+                        {  1,  61, 105}, {  1,  37,  60}, {  1,  20,  26},
+                    },
+                    {  // band 5
+                        { 70, 201, 243}, { 29, 163, 228}, {  4, 102, 169},
+                        {  1,  67, 114}, {  1,  39,  66}, {  1,  17,  29},
+                    },
+                },
+            },
+            {  // UV plane
+                {  // Inter
+                    {  // band 0
+                        {181,  38, 192}, { 95,  47, 151}, { 29,  49, 102},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 72, 131, 202}, { 93, 120, 205}, { 50, 103, 179},
+                        { 24,  79, 143}, { 11,  47,  78}, {  7,  19,  25},
+                    },
+                    {  // band 2
+                        { 84, 176, 221}, { 56, 144, 214}, { 21, 108, 182},
+                        {  8,  83, 139}, {  3,  55,  90}, {  2,  27,  41},
+                    },
+                    {  // band 3
+                        { 84, 195, 234}, { 42, 156, 222}, { 10, 109, 180},
+                        {  4,  77, 133}, {  1,  48,  80}, {  1,  23,  35},
+                    },
+                    {  // band 4
+                        { 89, 210, 238}, { 35, 165, 221}, {  6, 106, 172},
+                        {  2,  70, 123}, {  1,  44,  74}, {  1,  21,  30},
+                    },
+                    {  // band 5
+                        {114, 221, 247}, { 49, 170, 234}, {  7, 113, 184},
+                        {  2,  77, 132}, {  1,  48,  79}, {  1,  25,  33},
+                    },
+                },
+                {  // Inter
+                    {  // band 0
+                        {192,  66, 237}, {113,  84, 211}, { 35,  84, 154},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 81, 180, 234}, {127, 165, 229}, { 58, 137, 204},
+                        { 41, 114, 174}, { 44,  94, 136}, { 29,  66,  86},
+                    },
+                    {  // band 2
+                        { 82, 193, 240}, { 39, 162, 223}, {  8, 113, 179},
+                        {  3,  83, 136}, {  6,  62,  84}, {  5,  45,  45},
+                    },
+                    {  // band 3
+                        { 78, 203, 242}, { 31, 170, 227}, {  4, 115, 181},
+                        {  1,  82, 135}, {  2,  59,  82}, {  1,  45,  47},
+                    },
+                    {  // band 4
+                        { 76, 210, 239}, { 25, 170, 213}, {  2,  99, 152},
+                        {  1,  69, 115}, {  1,  49,  80}, {  1,  47,  57},
+                    },
+                    {  // band 5
+                        {103, 217, 250}, { 42, 180, 237}, {  3, 124, 191},
+                        {  1,  90, 150}, {  1,  69, 116}, {  1,  52,  46},
+                    },
+                },
+            },
+        },
+        {  // TX_SIZE 2
+            {  // Y plane
+                {  // Intra
+                    {  // band 0
+                        { 58,  38,  99}, {  9,  26,  51}, {  1,  14,  22},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 14,  78, 109}, { 16,  73, 105}, { 11,  62,  92},
+                        {  6,  47,  72}, {  2,  29,  45}, {  1,  12,  18},
+                    },
+                    {  // band 2
+                        { 17, 131, 148}, { 11, 112, 140}, {  5,  87, 118},
+                        {  2,  63,  90}, {  1,  42,  63}, {  1,  19,  31},
+                    },
+                    {  // band 3
+                        { 12, 151, 168}, {  6, 116, 152}, {  1,  76, 115},
+                        {  1,  50,  81}, {  1,  32,  52}, {  1,  14,  23},
+                    },
+                    {  // band 4
+                        { 10, 174, 191}, {  3, 130, 172}, {  1,  80, 126},
+                        {  1,  53,  88}, {  1,  32,  55}, {  1,  14,  24},
+                    },
+                    {  // band 5
+                        { 19, 219, 237}, {  3, 168, 211}, {  1,  90, 142},
+                        {  1,  53,  91}, {  1,  29,  51}, {  1,  12,  21},
+                    },
+                },
+                {  // Intra
+                    {  // band 0
+                        { 21,  46, 184}, { 10,  53, 130}, {  2,  49,  78},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {  3, 169, 198}, { 37, 165, 196}, { 26, 134, 176},
+                        { 11, 108, 149}, {  5,  81, 112}, {  3,  47,  64},
+                    },
+                    {  // band 2
+                        { 11, 183, 215}, {  8, 142, 192}, {  2,  91, 141},
+                        {  1,  62, 100}, {  1,  38,  62}, {  1,  17,  28},
+                    },
+                    {  // band 3
+                        { 12, 190, 223}, {  6, 149, 199}, {  1,  88, 139},
+                        {  1,  56,  93}, {  1,  31,  54}, {  1,  13,  21},
+                    },
+                    {  // band 4
+                        { 11, 197, 230}, {  3, 154, 204}, {  1,  83, 134},
+                        {  1,  50,  86}, {  1,  28,  49}, {  1,  12,  21},
+                    },
+                    {  // band 5
+                        { 17, 211, 240}, {  2, 167, 217}, {  1,  88, 143},
+                        {  1,  53,  91}, {  1,  30,  53}, {  1,  14,  24},
+                    },
+                },
+            },
+            {  // UV plane
+                {  // Inter
+                    {  // band 0
+                        {151,  30, 151}, { 50,  36, 105}, {  8,  34,  66},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 39, 111, 160}, { 62, 111, 165}, { 37,  99, 147},
+                        { 15,  77, 118}, {  3,  47,  73}, {  1,  17,  27},
+                    },
+                    {  // band 2
+                        { 48, 170, 190}, { 32, 135, 180}, { 11, 100, 149},
+                        {  4,  76, 116}, {  1,  51,  80}, {  1,  22,  36},
+                    },
+                    {  // band 3
+                        { 39, 191, 208}, { 18, 141, 191}, {  3,  96, 150},
+                        {  1,  66, 110}, {  1,  41,  69}, {  1,  17,  28},
+                    },
+                    {  // band 4
+                        { 32, 209, 219}, {  8, 152, 201}, {  1,  96, 153},
+                        {  1,  63, 106}, {  1,  38,  66}, {  1,  17,  29},
+                    },
+                    {  // band 5
+                        { 33, 230, 237}, {  5, 173, 214}, {  1, 100, 155},
+                        {  1,  62, 105}, {  1,  38,  66}, {  1,  18,  32},
+                    },
+                },
+                {  // Inter
+                    {  // band 0
+                        {149,  38, 231}, { 59,  51, 186}, { 12,  54, 117},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 53, 179, 226}, {126, 176, 223}, { 58, 147, 202},
+                        { 28, 118, 174}, { 15,  94, 138}, { 14,  63,  87},
+                    },
+                    {  // band 2
+                        { 58, 196, 232}, { 26, 158, 213}, {  5, 106, 166},
+                        {  1,  75, 124}, {  1,  46,  79}, {  1,  23,  39},
+                    },
+                    {  // band 3
+                        { 46, 203, 235}, { 17, 162, 213}, {  2, 104, 165},
+                        {  1,  72, 120}, {  1,  44,  74}, {  1,  22,  33},
+                    },
+                    {  // band 4
+                        { 37, 213, 238}, {  8, 167, 216}, {  1, 104, 168},
+                        {  1,  68, 119}, {  1,  40,  67}, {  1,  17,  29},
+                    },
+                    {  // band 5
+                        { 30, 228, 239}, {  4, 181, 213}, {  1, 103, 153},
+                        {  1,  65, 110}, {  1,  43,  79}, {  1,  27,  56},
+                    },
+                },
+            },
+        },
+        {  // TX_SIZE 3
+            {  // Y plane
+                {  // Intra
+                    {  // band 0
+                        { 76,  25,  53}, {  9,  18,  32}, {  1,  12,  18},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 29,  55,  91}, { 19,  58,  95}, { 15,  57,  89},
+                        { 12,  49,  77}, {  3,  29,  44}, {  1,   8,  12},
+                    },
+                    {  // band 2
+                        { 32, 160, 148}, { 33, 143, 146}, { 19, 122, 132},
+                        {  6,  90, 102}, {  1,  58,  70}, {  1,  17,  24},
+                    },
+                    {  // band 3
+                        { 16, 181, 181}, {  6, 142, 165}, {  1,  90, 120},
+                        {  1,  50,  71}, {  1,  25,  38}, {  1,   9,  14},
+                    },
+                    {  // band 4
+                        { 13, 203, 203}, {  3, 154, 176}, {  1,  80, 108},
+                        {  1,  41,  61}, {  1,  24,  37}, {  1,  11,  17},
+                    },
+                    {  // band 5
+                        {  6, 234, 240}, {  1, 178, 204}, {  1,  80, 119},
+                        {  1,  45,  71}, {  1,  26,  42}, {  1,  12,  19},
+                    },
+                },
+                {  // Intra
+                    {  // band 0
+                        { 78,  20, 135}, { 25,  18, 101}, {  5,  19,  57},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {  7, 144, 183}, {117, 151, 195}, {109, 151, 187},
+                        { 39, 130, 168}, { 11, 100, 125}, {  4,  59,  64},
+                    },
+                    {  // band 2
+                        { 20, 184, 212}, { 12, 148, 191}, {  2,  98, 141},
+                        {  1,  65, 100}, {  1,  39,  61}, {  1,  14,  22},
+                    },
+                    {  // band 3
+                        { 15, 194, 222}, {  6, 153, 198}, {  1,  92, 138},
+                        {  1,  58,  91}, {  1,  32,  52}, {  1,  12,  18},
+                    },
+                    {  // band 4
+                        { 14, 206, 232}, {  3, 162, 206}, {  1,  89, 134},
+                        {  1,  52,  83}, {  1,  28,  46}, {  1,  11,  17},
+                    },
+                    {  // band 5
+                        {  6, 225, 241}, {  1, 175, 210}, {  1,  81, 125},
+                        {  1,  48,  78}, {  1,  28,  46}, {  1,  13,  21},
+                    },
+                },
+            },
+            {  // UV plane
+                {  // Inter
+                    {  // band 0
+                        {124,  23,  93}, { 31,  24,  63}, {  6,  24,  46},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 23,  86, 126}, { 45,  90, 145}, { 31,  91, 133},
+                        { 19,  80, 114}, {  7,  53,  72}, {  1,  20,  27},
+                    },
+                    {  // band 2
+                        { 51, 186, 189}, { 48, 159, 182}, { 33, 128, 156},
+                        { 15,  92, 124}, {  2,  62,  83}, {  1,  29,  43},
+                    },
+                    {  // band 3
+                        { 36, 198, 211}, { 15, 156, 187}, {  3,  97, 137},
+                        {  1,  61,  93}, {  1,  35,  57}, {  1,  15,  23},
+                    },
+                    {  // band 4
+                        { 34, 219, 223}, {  9, 162, 193}, {  1,  91, 136},
+                        {  1,  58,  92}, {  1,  35,  54}, {  1,  14,  23},
+                    },
+                    {  // band 5
+                        { 19, 243, 243}, {  3, 191, 208}, {  1,  91, 137},
+                        {  1,  56,  90}, {  1,  34,  55}, {  1,  16,  24},
+                    },
+                },
+                {  // Inter
+                    {  // band 0
+                        {119,  20, 197}, { 19,  29, 156}, {  3,  30, 107},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 24, 192, 226}, {161, 193, 227}, { 97, 185, 222},
+                        { 31, 158, 204}, { 16, 122, 165}, { 17,  84, 112},
+                    },
+                    {  // band 2
+                        { 26, 202, 229}, { 11, 165, 210}, {  2, 103, 152},
+                        {  1,  68, 104}, {  1,  42,  70}, {  1,  16,  36},
+                    },
+                    {  // band 3
+                        { 24, 209, 237}, {  6, 169, 214}, {  1, 102, 154},
+                        {  1,  65, 107}, {  1,  45,  68}, {  1,  17,  24},
+                    },
+                    {  // band 4
+                        { 19, 219, 243}, {  4, 183, 226}, {  1, 115, 172},
+                        {  1,  73, 119}, {  1,  43,  77}, {  1,  15,  37},
+                    },
+                    {  // band 5
+                        { 11, 237, 241}, {  2, 190, 216}, {  1, 108, 146},
+                        {  1,  59,  94}, {  1,  40,  67}, {  1,  30,  53},
+                    },
+                },
+            },
+        },
+    },
+    {  // Q_Index 1
+        {  // TX_SIZE 0
+            {  // Y plane
+                {  // Intra
+                    {  // band 0
+                        {174,  30, 159}, { 76,  38, 115}, { 15,  33,  65},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 60,  80, 153}, { 72,  75, 147}, { 36,  68, 129},
+                        { 15,  59, 104}, {  4,  45,  74}, {  1,  28,  45},
+                    },
+                    {  // band 2
+                        { 70, 122, 186}, { 55, 104, 175}, { 21,  83, 144},
+                        {  8,  67, 112}, {  2,  51,  82}, {  1,  34,  57},
+                    },
+                    {  // band 3
+                        { 97, 144, 207}, { 52, 109, 195}, { 16,  77, 153},
+                        {  4,  58, 113}, {  1,  43,  77}, {  1,  27,  48},
+                    },
+                    {  // band 4
+                        {128, 148, 229}, { 76, 104, 210}, { 18,  77, 159},
+                        {  4,  65, 110}, {  1,  52,  82}, {  1,  31,  55},
+                    },
+                    {  // band 5
+                        {165,  51, 238}, {128,  50, 230}, { 57,  49, 185},
+                        { 28,  47, 130}, { 12,  44,  96}, {  3,  36,  60},
+                    },
+                },
+                {  // Intra
+                    {  // band 0
+                        {169, 103, 203}, {117,  96, 176}, { 56,  81, 137},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 31, 150, 224}, { 49, 128, 212}, { 19,  92, 165},
+                        {  6,  67, 116}, {  2,  43,  71}, {  1,  21,  36},
+                    },
+                    {  // band 2
+                        { 58, 156, 230}, { 47, 130, 215}, {  7,  87, 158},
+                        {  2,  63, 114}, {  1,  39,  71}, {  1,  18,  36},
+                    },
+                    {  // band 3
+                        { 85, 176, 240}, { 43, 138, 226}, {  8,  93, 172},
+                        {  2,  70, 127}, {  1,  46,  81}, {  1,  26,  47},
+                    },
+                    {  // band 4
+                        {155, 144, 248}, { 93, 116, 235}, { 21,  83, 180},
+                        {  4,  59, 119}, {  1,  43,  80}, {  1,  25,  50},
+                    },
+                    {  // band 5
+                        {203,  61, 250}, {171,  57, 243}, { 71,  57, 199},
+                        { 31,  49, 144}, { 13,  42,  96}, {  7,  30,  52},
+                    },
+                },
+            },
+            {  // UV plane
+                {  // Inter
+                    {  // band 0
+                        {204,  44, 204}, {137,  57, 184}, { 72,  62, 152},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {145, 117, 236}, {151, 112, 231}, { 87,  95, 208},
+                        { 31,  77, 165}, {  5,  49,  98}, {  1,  24,  39},
+                    },
+                    {  // band 2
+                        {146, 152, 241}, {140, 132, 236}, { 41, 103, 209},
+                        { 10,  86, 165}, {  2,  55, 106}, {  1,  25,  58},
+                    },
+                    {  // band 3
+                        {154, 181, 249}, { 84, 143, 240}, { 23, 114, 210},
+                        {  6, 102, 182}, {  2,  71, 137}, {  1,  35,  90},
+                    },
+                    {  // band 4
+                        {184, 150, 251}, {115, 130, 244}, { 34, 105, 215},
+                        { 15,  89, 173}, {  1,  51, 141}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        {211,  71, 253}, {193,  78, 249}, {106,  91, 232},
+                        { 61,  87, 198}, { 85, 153, 254}, {128, 128, 128},
+                    },
+                },
+                {  // Inter
+                    {  // band 0
+                        {232, 104, 242}, {165, 114, 227}, { 96, 120, 206},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {137, 178, 250}, {146, 153, 245}, { 74, 108, 205},
+                        { 41,  81, 149}, { 24,  55, 104}, { 13,  36,  68},
+                    },
+                    {  // band 2
+                        {147, 185, 252}, {127, 161, 246}, { 30, 104, 208},
+                        { 11,  74, 154}, {  6,  54, 100}, {  2,  29,  63},
+                    },
+                    {  // band 3
+                        {163, 191, 254}, {101, 161, 249}, { 22, 114, 215},
+                        {  6,  89, 173}, {  1,  65, 120}, {  1,   1, 170},
+                    },
+                    {  // band 4
+                        {197, 160, 254}, {142, 141, 251}, { 39, 102, 218},
+                        { 10,  76, 158}, {  1,  56, 122}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        {224,  76, 254}, {215,  84, 253}, {107,  85, 232},
+                        { 43,  71, 177}, {  1,   1, 254}, {128, 128, 128},
+                    },
+                },
+            },
+        },
+        {  // TX_SIZE 1
+            {  // Y plane
+                {  // Intra
+                    {  // band 0
+                        { 68,  37, 120}, { 21,  34,  82}, {  5,  26,  49},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 41,  89, 138}, { 56,  83, 132}, { 31,  73, 115},
+                        { 16,  62,  92}, {  5,  45,  62}, {  1,  24,  32},
+                    },
+                    {  // band 2
+                        { 48, 139, 165}, { 30, 114, 160}, { 13,  92, 132},
+                        {  6,  72, 103}, {  3,  49,  72}, {  1,  26,  41},
+                    },
+                    {  // band 3
+                        { 44, 162, 191}, { 20, 127, 175}, {  5,  90, 137},
+                        {  1,  62, 100}, {  1,  38,  63}, {  1,  20,  32},
+                    },
+                    {  // band 4
+                        { 51, 184, 213}, { 16, 137, 193}, {  2,  89, 143},
+                        {  1,  60, 102}, {  1,  39,  66}, {  1,  23,  37},
+                    },
+                    {  // band 5
+                        { 76, 200, 235}, { 27, 150, 216}, {  3,  99, 164},
+                        {  1,  70, 119}, {  1,  45,  77}, {  1,  22,  38},
+                    },
+                },
+                {  // Intra
+                    {  // band 0
+                        { 81, 112, 199}, { 49, 101, 164}, { 19,  80, 119},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 12, 181, 217}, { 48, 151, 212}, { 38, 118, 180},
+                        { 22,  95, 140}, { 11,  67,  92}, { 13,  46,  44},
+                    },
+                    {  // band 2
+                        { 29, 188, 226}, { 19, 147, 210}, {  5,  95, 154},
+                        {  4,  68, 106}, {  3,  44,  60}, {  1,  24,  27},
+                    },
+                    {  // band 3
+                        { 30, 195, 234}, { 15, 153, 216}, {  3,  95, 156},
+                        {  2,  66, 108}, {  2,  44,  62}, {  1,  24,  29},
+                    },
+                    {  // band 4
+                        { 36, 203, 243}, { 12, 162, 225}, {  2,  98, 163},
+                        {  2,  67, 113}, {  2,  45,  68}, {  1,  24,  34},
+                    },
+                    {  // band 5
+                        { 86, 207, 248}, { 35, 165, 236}, {  3, 107, 180},
+                        {  1,  73, 128}, {  1,  45,  78}, {  1,  20,  34},
+                    },
+                },
+            },
+            {  // UV plane
+                {  // Inter
+                    {  // band 0
+                        {188,  37, 205}, {118,  51, 172}, { 56,  57, 135},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {116, 135, 225}, {144, 123, 221}, { 72, 103, 197},
+                        { 35,  77, 153}, { 15,  47,  82}, {  6,  25,  34},
+                    },
+                    {  // band 2
+                        {128, 171, 233}, { 82, 142, 226}, { 31, 106, 191},
+                        { 16,  82, 146}, {  9,  59,  98}, {  4,  33,  54},
+                    },
+                    {  // band 3
+                        {126, 197, 241}, { 66, 155, 230}, { 18, 108, 190},
+                        {  7,  82, 148}, {  3,  58,  98}, {  1,  25,  50},
+                    },
+                    {  // band 4
+                        {117, 207, 244}, { 44, 163, 233}, {  9, 112, 191},
+                        {  5,  84, 148}, {  3,  61,  87}, {  1,  28,  38},
+                    },
+                    {  // band 5
+                        {112, 214, 249}, { 39, 174, 240}, {  6, 125, 205},
+                        {  4,  96, 163}, {  5,  66, 100}, {  1, 128, 254},
+                    },
+                },
+                {  // Inter
+                    {  // band 0
+                        {227,  70, 234}, {145,  91, 213}, { 61, 100, 173},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {108, 198, 243}, {171, 172, 240}, {118, 130, 210},
+                        {104, 107, 165}, { 64,  85, 114}, { 55,  64,  60},
+                    },
+                    {  // band 2
+                        {110, 208, 247}, { 64, 175, 237}, { 24, 112, 187},
+                        { 24,  81, 133}, { 24,  63,  83}, { 21,  47,  53},
+                    },
+                    {  // band 3
+                        { 91, 218, 249}, { 46, 188, 238}, {  8, 113, 184},
+                        {  5,  83, 137}, {  6,  62,  95}, { 17,  44,  94},
+                    },
+                    {  // band 4
+                        { 84, 216, 248}, { 30, 187, 237}, {  2, 117, 188},
+                        {  1,  88, 141}, {  3,  63,  98}, {  1,   1,   1},
+                    },
+                    {  // band 5
+                        {116, 218, 252}, { 47, 186, 242}, {  2, 132, 204},
+                        {  1, 106, 175}, {  1,  88, 104}, {  1, 254, 128},
+                    },
+                },
+            },
+        },
+        {  // TX_SIZE 2
+            {  // Y plane
+                {  // Intra
+                    {  // band 0
+                        { 35,  41, 129}, { 12,  30,  70}, {  2,  19,  32},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 30,  77, 116}, { 39,  70, 110}, { 20,  58,  96},
+                        {  8,  47,  77}, {  2,  33,  52}, {  1,  17,  26},
+                    },
+                    {  // band 2
+                        { 31, 123, 146}, { 18, 103, 140}, {  7,  81, 119},
+                        {  2,  62,  95}, {  1,  44,  70}, {  1,  26,  42},
+                    },
+                    {  // band 3
+                        { 21, 149, 170}, {  9, 114, 158}, {  2,  80, 126},
+                        {  1,  57,  94}, {  1,  36,  61}, {  1,  18,  31},
+                    },
+                    {  // band 4
+                        { 20, 178, 199}, {  6, 134, 183}, {  1,  87, 139},
+                        {  1,  60, 100}, {  1,  37,  64}, {  1,  18,  31},
+                    },
+                    {  // band 5
+                        { 36, 218, 233}, {  6, 160, 207}, {  1,  92, 147},
+                        {  1,  59, 101}, {  1,  35,  62}, {  1,  18,  31},
+                    },
+                },
+                {  // Intra
+                    {  // band 0
+                        { 17,  62, 211}, { 14,  62, 153}, {  5,  50,  84},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 11, 180, 205}, { 87, 160, 205}, { 53, 128, 184},
+                        { 27, 106, 156}, { 13,  79, 115}, {  6,  46,  67},
+                    },
+                    {  // band 2
+                        { 32, 194, 220}, { 20, 145, 202}, {  4,  96, 152},
+                        {  1,  67, 111}, {  1,  42,  70}, {  1,  21,  37},
+                    },
+                    {  // band 3
+                        { 30, 204, 228}, { 14, 152, 207}, {  1,  92, 149},
+                        {  1,  61, 103}, {  1,  34,  59}, {  1,  16,  28},
+                    },
+                    {  // band 4
+                        { 27, 213, 235}, {  7, 159, 210}, {  1,  88, 143},
+                        {  1,  55,  94}, {  1,  31,  53}, {  1,  16,  27},
+                    },
+                    {  // band 5
+                        { 28, 223, 243}, {  4, 173, 217}, {  1,  91, 146},
+                        {  1,  58,  98}, {  1,  35,  60}, {  1,  19,  33},
+                    },
+                },
+            },
+            {  // UV plane
+                {  // Inter
+                    {  // band 0
+                        {172,  37, 202}, { 83,  51, 156}, { 24,  53, 110},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 76, 134, 206}, {110, 124, 200}, { 47, 106, 180},
+                        { 15,  82, 145}, {  3,  48,  83}, {  1,  19,  32},
+                    },
+                    {  // band 2
+                        { 80, 176, 220}, { 49, 145, 212}, { 17, 112, 180},
+                        {  7,  84, 140}, {  1,  53,  89}, {  1,  27,  43},
+                    },
+                    {  // band 3
+                        { 74, 201, 232}, { 38, 158, 221}, {  8, 112, 179},
+                        {  2,  79, 132}, {  1,  47,  82}, {  1,  26,  42},
+                    },
+                    {  // band 4
+                        { 73, 215, 239}, { 28, 169, 227}, {  3, 112, 176},
+                        {  1,  74, 126}, {  1,  48,  79}, {  1,  27,  44},
+                    },
+                    {  // band 5
+                        { 71, 233, 244}, { 18, 180, 230}, {  1, 114, 180},
+                        {  1,  80, 134}, {  1,  51,  85}, {  1,  26,  36},
+                    },
+                },
+                {  // Inter
+                    {  // band 0
+                        {213,  34, 244}, {126,  57, 212}, { 46,  67, 151},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {120, 202, 245}, {198, 173, 241}, {119, 146, 224},
+                        { 76, 126, 195}, { 44, 102, 159}, { 40,  76, 115},
+                    },
+                    {  // band 2
+                        {120, 215, 248}, { 69, 171, 237}, { 23, 119, 194},
+                        { 10,  86, 147}, {  2,  56,  94}, {  1,  25,  44},
+                    },
+                    {  // band 3
+                        {102, 226, 250}, { 53, 183, 239}, {  9, 118, 188},
+                        {  2,  78, 131}, {  1,  48,  89}, {  1,  17,  36},
+                    },
+                    {  // band 4
+                        { 86, 235, 252}, { 34, 194, 240}, {  2, 109, 173},
+                        {  1,  68, 118}, {  1,  44,  79}, {  1,   1,  38},
+                    },
+                    {  // band 5
+                        { 59, 236, 243}, { 11, 189, 228}, {  1, 112, 187},
+                        {  1,  88, 145}, {  1,  55,  92}, {  1,   1, 128},
+                    },
+                },
+            },
+        },
+        {  // TX_SIZE 3
+            {  // Y plane
+                {  // Intra
+                    {  // band 0
+                        { 41,  40, 104}, { 12,  31,  64}, {  2,  16,  28},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 65,  58, 132}, { 50,  61, 130}, { 40,  57, 116},
+                        { 22,  46,  87}, {  2,  28,  44}, {  1,  11,  17},
+                    },
+                    {  // band 2
+                        { 55, 139, 135}, { 46, 122, 132}, { 21,  89, 110},
+                        {  6,  60,  78}, {  1,  38,  54}, {  1,  17,  27},
+                    },
+                    {  // band 3
+                        { 29, 167, 161}, { 10, 120, 141}, {  1,  69,  98},
+                        {  1,  42,  66}, {  1,  28,  44}, {  1,  15,  24},
+                    },
+                    {  // band 4
+                        { 19, 191, 180}, {  4, 125, 154}, {  1,  70, 107},
+                        {  1,  48,  77}, {  1,  33,  53}, {  1,  17,  28},
+                    },
+                    {  // band 5
+                        { 16, 238, 231}, {  2, 163, 198}, {  1,  85, 134},
+                        {  1,  54,  90}, {  1,  34,  57}, {  1,  17,  29},
+                    },
+                },
+                {  // Intra
+                    {  // band 0
+                        { 70,  15, 216}, { 40,  18, 164}, { 14,  17,  83},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 25, 150, 200}, {185, 154, 211}, {123, 137, 199},
+                        { 67, 119, 177}, { 31,  96, 137}, { 18,  63,  86},
+                    },
+                    {  // band 2
+                        { 57, 187, 223}, { 35, 148, 207}, {  7, 104, 159},
+                        {  2,  72, 113}, {  1,  44,  71}, {  1,  20,  34},
+                    },
+                    {  // band 3
+                        { 44, 203, 233}, { 18, 157, 212}, {  1,  98, 150},
+                        {  1,  61, 102}, {  1,  38,  62}, {  1,  19,  31},
+                    },
+                    {  // band 4
+                        { 41, 215, 238}, { 11, 166, 215}, {  1,  94, 146},
+                        {  1,  60, 101}, {  1,  37,  63}, {  1,  17,  28},
+                    },
+                    {  // band 5
+                        { 19, 236, 246}, {  3, 188, 223}, {  1,  95, 146},
+                        {  1,  58,  95}, {  1,  34,  56}, {  1,  17,  27},
+                    },
+                },
+            },
+            {  // UV plane
+                {  // Inter
+                    {  // band 0
+                        {146,  27, 156}, { 49,  32, 116}, { 10,  39,  77},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 47, 101, 172}, { 93, 100, 178}, { 58,  91, 165},
+                        { 26,  75, 134}, {  4,  49,  82}, {  2,  22,  33},
+                    },
+                    {  // band 2
+                        { 60, 158, 196}, { 44, 135, 186}, { 25, 106, 157},
+                        {  8,  81, 124}, {  2,  56,  86}, {  1,  28,  45},
+                    },
+                    {  // band 3
+                        { 44, 169, 212}, { 15, 138, 196}, {  2, 100, 157},
+                        {  1,  74, 119}, {  1,  49,  76}, {  1,  20,  34},
+                    },
+                    {  // band 4
+                        { 38, 199, 231}, { 11, 158, 214}, {  1, 111, 167},
+                        {  1,  76, 122}, {  1,  44,  76}, {  1,  17,  39},
+                    },
+                    {  // band 5
+                        { 40, 236, 246}, { 10, 187, 230}, {  1, 115, 175},
+                        {  1,  74, 122}, {  1,  42,  71}, {  1,  14,  59},
+                    },
+                },
+                {  // Inter
+                    {  // band 0
+                        {161,  26, 237}, { 65,  46, 209}, { 21,  46, 161},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 87, 229, 245}, {206, 214, 244}, {148, 186, 236},
+                        { 89, 165, 221}, { 41, 132, 186}, { 37,  93, 141},
+                    },
+                    {  // band 2
+                        { 93, 231, 246}, { 47, 181, 231}, {  8, 117, 188},
+                        {  2,  84, 138}, {  1,  43,  87}, {  1,  27,  41},
+                    },
+                    {  // band 3
+                        { 80, 239, 250}, { 28, 190, 236}, {  1, 119, 183},
+                        {  1,  84, 135}, {  1,  81,  69}, {  1, 102,   1},
+                    },
+                    {  // band 4
+                        { 67, 245, 252}, { 22, 206, 242}, {  1, 130, 195},
+                        {  1,  77, 136}, {  1,  35,  88}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        { 43, 250, 228}, { 31, 185, 204}, {  6, 101, 183},
+                        {  1,  92, 151}, {  1,  84, 137}, {128, 128, 128},
+                    },
+                },
+            },
+        },
+    },
+    {  // Q_Index 2
+        {  // TX_SIZE 0
+            {  // Y plane
+                {  // Intra
+                    {  // band 0
+                        {181,  22, 175}, { 96,  37, 147}, { 35,  41, 105},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 80,  95, 197}, {111,  92, 193}, { 59,  87, 175},
+                        { 29,  79, 150}, { 10,  65, 118}, {  2,  47,  82},
+                    },
+                    {  // band 2
+                        { 90, 141, 216}, { 77, 120, 210}, { 23,  95, 184},
+                        { 11,  81, 151}, {  6,  75, 130}, {  2,  58, 113},
+                    },
+                    {  // band 3
+                        {122, 167, 231}, { 66, 119, 225}, { 26,  87, 189},
+                        {  7,  76, 151}, {  2,  63, 125}, {  1,  59,  77},
+                    },
+                    {  // band 4
+                        {162, 147, 244}, {110,  97, 236}, { 32,  88, 204},
+                        { 11,  89, 174}, {  5,  78, 151}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        {205,  59, 251}, {176,  68, 248}, { 90,  71, 223},
+                        { 49,  72, 188}, { 17,  74, 203}, {128, 128, 128},
+                    },
+                },
+                {  // Intra
+                    {  // band 0
+                        {188,  70, 207}, {140,  73, 189}, { 85,  73, 163},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 59, 144, 239}, { 79, 126, 237}, { 31, 102, 202},
+                        { 10,  81, 153}, {  3,  56, 102}, {  2,  33,  59},
+                    },
+                    {  // band 2
+                        {100, 152, 243}, { 80, 129, 236}, { 14,  94, 194},
+                        {  4,  72, 150}, {  1,  50, 103}, {  1,  35,  60},
+                    },
+                    {  // band 3
+                        {130, 183, 247}, { 70, 139, 242}, { 19, 100, 203},
+                        {  4,  83, 159}, {  1,  59, 119}, {  1,  44,  72},
+                    },
+                    {  // band 4
+                        {197, 138, 252}, {135, 107, 247}, { 31,  86, 210},
+                        {  7,  74, 160}, {  1,  53, 107}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        {229,  54, 254}, {200,  51, 251}, { 83,  61, 226},
+                        { 33,  55, 177}, { 12,  74, 145}, {128, 128, 128},
+                    },
+                },
+            },
+            {  // UV plane
+                {  // Inter
+                    {  // band 0
+                        {229,  20, 235}, {183,  37, 221}, {127,  47, 198},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {188, 115, 251}, {208, 110, 250}, {101,  99, 235},
+                        { 38,  81, 197}, {  9,  56, 132}, {  9,  52,  63},
+                    },
+                    {  // band 2
+                        {189, 150, 252}, {186, 137, 251}, { 54, 107, 236},
+                        { 14,  90, 195}, {  1,  89, 104}, {128, 128, 128},
+                    },
+                    {  // band 3
+                        {209, 180, 254}, {142, 145, 253}, { 51, 130, 236},
+                        {  6, 128, 214}, {  1, 128, 254}, {128, 128, 128},
+                    },
+                    {  // band 4
+                        {231, 140, 254}, {194, 128, 254}, { 75, 119, 233},
+                        {128,  23, 230}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        {244,  59, 254}, {239,  81, 254}, {128,  85, 254},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                },
+                {  // Inter
+                    {  // band 0
+                        {246,  55, 247}, {197,  64, 235}, {141,  74, 218},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {178, 163, 254}, {192, 138, 252}, { 85, 103, 231},
+                        { 49,  81, 179}, { 32,  54, 133}, { 12,  26,  98},
+                    },
+                    {  // band 2
+                        {189, 173, 254}, {179, 150, 253}, { 60,  94, 237},
+                        { 34,  81, 198}, { 20,  53, 187}, {128, 128, 128},
+                    },
+                    {  // band 3
+                        {202, 191, 254}, {157, 160, 254}, { 57, 117, 240},
+                        { 28, 105, 211}, {  1, 128,   1}, {128, 128, 128},
+                    },
+                    {  // band 4
+                        {231, 146, 254}, {208, 133, 254}, { 66,  78, 233},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        {246,  49, 254}, {246,  63, 254}, { 85, 142, 254},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                },
+            },
+        },
+        {  // TX_SIZE 1
+            {  // Y plane
+                {  // Intra
+                    {  // band 0
+                        { 45,  28, 124}, { 23,  35, 107}, { 10,  34,  78},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 53,  99, 177}, { 82,  96, 174}, { 46,  89, 158},
+                        { 21,  76, 133}, {  6,  56,  94}, {  1,  33,  54},
+                    },
+                    {  // band 2
+                        { 68, 147, 201}, { 42, 124, 195}, { 17,  98, 166},
+                        {  7,  75, 131}, {  2,  53,  93}, {  1,  33,  59},
+                    },
+                    {  // band 3
+                        { 65, 176, 217}, { 30, 137, 206}, {  6,  97, 167},
+                        {  2,  70, 128}, {  1,  47,  88}, {  1,  29,  46},
+                    },
+                    {  // band 4
+                        { 69, 195, 232}, { 24, 146, 218}, {  4, 100, 175},
+                        {  2,  72, 134}, {  1,  51,  93}, {  1,  29,  52},
+                    },
+                    {  // band 5
+                        { 96, 212, 246}, { 39, 158, 234}, {  6, 109, 192},
+                        {  2,  77, 144}, {  1,  50,  95}, {  1,  20,  45},
+                    },
+                },
+                {  // Intra
+                    {  // band 0
+                        { 71,  80, 213}, { 53,  73, 181}, { 25,  66, 141},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 35, 168, 231}, { 91, 150, 229}, { 49, 122, 202},
+                        { 22,  97, 162}, { 10,  68, 108}, {  9,  48,  57},
+                    },
+                    {  // band 2
+                        { 56, 178, 236}, { 32, 148, 225}, {  9,  99, 176},
+                        {  4,  69, 127}, {  2,  44,  78}, {  1,  25,  41},
+                    },
+                    {  // band 3
+                        { 57, 191, 242}, { 27, 155, 230}, {  5, 102, 180},
+                        {  2,  71, 133}, {  1,  44,  78}, {  1,  27,  41},
+                    },
+                    {  // band 4
+                        { 67, 201, 247}, { 24, 162, 237}, {  3, 106, 188},
+                        {  3,  74, 137}, {  1,  46,  85}, {  1,  34,  48},
+                    },
+                    {  // band 5
+                        {111, 210, 251}, { 47, 166, 244}, {  3, 113, 199},
+                        {  2,  77, 146}, {  1,  48,  93}, {  1,  38,  22},
+                    },
+                },
+            },
+            {  // UV plane
+                {  // Inter
+                    {  // band 0
+                        {206,  21, 221}, {150,  36, 195}, { 94,  44, 164},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {147, 128, 239}, {194, 122, 238}, { 95, 104, 220},
+                        { 39,  81, 183}, { 13,  53, 111}, {  3,  24,  49},
+                    },
+                    {  // band 2
+                        {164, 163, 244}, {106, 142, 239}, { 50, 112, 215},
+                        { 26,  90, 177}, { 12,  67, 130}, {  1,   1,  64},
+                    },
+                    {  // band 3
+                        {155, 193, 249}, { 88, 158, 244}, { 26, 124, 220},
+                        { 10,  98, 173}, {  1,  77, 126}, {128, 128, 128},
+                    },
+                    {  // band 4
+                        {141, 205, 252}, { 64, 174, 248}, { 17, 124, 221},
+                        { 12,  92, 176}, {  1,  29, 148}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        {150, 217, 254}, { 74, 191, 252}, { 30, 144, 215},
+                        {  1, 106, 137}, {128,   1, 128}, {128, 128, 128},
+                    },
+                },
+                {  // Inter
+                    {  // band 0
+                        {241,  37, 242}, {175,  48, 223}, { 99,  53, 189},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {153, 183, 248}, {212, 156, 247}, {134, 124, 221},
+                        { 88, 103, 184}, { 59,  86, 132}, { 29,  61,  67},
+                    },
+                    {  // band 2
+                        {162, 199, 250}, {106, 167, 247}, { 56, 110, 207},
+                        { 32,  85, 165}, { 16,  71, 130}, {  1,  93, 254},
+                    },
+                    {  // band 3
+                        {143, 213, 252}, { 86, 187, 250}, { 23, 124, 220},
+                        {  7,  95, 176}, {  1, 109, 102}, {128, 128, 128},
+                    },
+                    {  // band 4
+                        {130, 219, 254}, { 70, 201, 253}, { 15, 128, 215},
+                        {  1, 101, 201}, {  1,  64, 170}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        {155, 219, 254}, {105, 207, 254}, { 28, 155, 229},
+                        {  1, 153, 191}, {128, 128, 128}, {128, 128, 128},
+                    },
+                },
+            },
+        },
+        {  // TX_SIZE 2
+            {  // Y plane
+                {  // Intra
+                    {  // band 0
+                        { 18,  26, 117}, { 10,  29,  82}, {  3,  25,  52},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 35,  88, 152}, { 62,  85, 150}, { 36,  77, 137},
+                        { 16,  66, 116}, {  4,  47,  81}, {  1,  26,  44},
+                    },
+                    {  // band 2
+                        { 55, 141, 182}, { 32, 119, 177}, { 12,  93, 154},
+                        {  4,  71, 123}, {  1,  51,  89}, {  1,  32,  56},
+                    },
+                    {  // band 3
+                        { 46, 171, 202}, { 21, 130, 191}, {  5,  91, 154},
+                        {  1,  64, 115}, {  1,  42,  77}, {  1,  25,  41},
+                    },
+                    {  // band 4
+                        { 43, 195, 219}, { 12, 142, 203}, {  1,  91, 156},
+                        {  1,  63, 115}, {  1,  41,  77}, {  1,  22,  43},
+                    },
+                    {  // band 5
+                        { 42, 221, 238}, {  8, 162, 219}, {  1,  98, 167},
+                        {  1,  67, 123}, {  1,  43,  83}, {  1,  25,  38},
+                    },
+                },
+                {  // Intra
+                    {  // band 0
+                        { 16,  51, 216}, { 20,  48, 168}, {  9,  44, 109},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 34, 164, 226}, {124, 148, 226}, { 72, 127, 207},
+                        { 36, 107, 175}, { 15,  81, 129}, {  6,  51,  79},
+                    },
+                    {  // band 2
+                        { 61, 182, 234}, { 35, 148, 220}, {  9, 101, 178},
+                        {  4,  71, 134}, {  1,  46,  90}, {  1,  24,  51},
+                    },
+                    {  // band 3
+                        { 54, 198, 239}, { 25, 156, 224}, {  3,  98, 173},
+                        {  1,  66, 124}, {  1,  41,  78}, {  1,  15,  37},
+                    },
+                    {  // band 4
+                        { 48, 209, 242}, { 12, 162, 226}, {  1,  96, 169},
+                        {  1,  63, 119}, {  1,  40,  78}, {  1,  18,  45},
+                    },
+                    {  // band 5
+                        { 44, 223, 247}, {  6, 173, 232}, {  1, 105, 178},
+                        {  1,  71, 131}, {  1,  44,  84}, {  1,  13,  46},
+                    },
+                },
+            },
+            {  // UV plane
+                {  // Inter
+                    {  // band 0
+                        {188,  26, 214}, {121,  42, 181}, { 66,  49, 149},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {136, 128, 233}, {172, 124, 230}, { 80, 106, 211},
+                        { 27,  81, 174}, {  6,  49,  98}, {  8,  28,  49},
+                    },
+                    {  // band 2
+                        {145, 166, 239}, { 92, 141, 229}, { 28, 108, 196},
+                        {  8,  87, 154}, {  1,  58, 105}, {  1,  27,  59},
+                    },
+                    {  // band 3
+                        {131, 193, 242}, { 66, 151, 231}, { 13, 112, 192},
+                        {  2,  81, 152}, {  1,  66, 121}, {  1,  23,  64},
+                    },
+                    {  // band 4
+                        {112, 211, 246}, { 41, 164, 235}, {  5, 117, 202},
+                        {  1,  83, 162}, {  1,  64, 111}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        { 96, 230, 250}, { 28, 185, 243}, {  2, 132, 204},
+                        {  1,  91, 166}, {  1,  85,  46}, {128, 128, 128},
+                    },
+                },
+                {  // Inter
+                    {  // band 0
+                        {238,  23, 242}, {157,  29, 215}, { 73,  27, 162},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {165, 173, 250}, {222, 151, 247}, {152, 134, 235},
+                        {114, 120, 210}, { 86, 109, 176}, { 53,  88, 145},
+                    },
+                    {  // band 2
+                        {164, 194, 249}, {100, 158, 241}, { 35, 111, 212},
+                        { 17,  85, 167}, {  1,  52, 112}, {  1,  73,   1},
+                    },
+                    {  // band 3
+                        {151, 215, 252}, { 83, 172, 245}, { 16, 122, 208},
+                        {  6, 101, 165}, {  1,  74, 113}, {  1,   1,   1},
+                    },
+                    {  // band 4
+                        {138, 230, 253}, { 65, 184, 248}, {  8, 128, 212},
+                        {  1, 111, 182}, {128,   1,   1}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        {123, 240, 253}, { 36, 201, 250}, {  3, 127, 211},
+                        {  1,  68, 204}, {128,   1,   1}, {128, 128, 128},
+                    },
+                },
+            },
+        },
+        {  // TX_SIZE 3
+            {  // Y plane
+                {  // Intra
+                    {  // band 0
+                        { 51,  21, 156}, { 30,  23,  86}, {  4,  18,  37},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 38,  77, 129}, { 79,  76, 129}, { 40,  66, 117},
+                        { 12,  54,  95}, {  1,  36,  60}, {  1,  17,  29},
+                    },
+                    {  // band 2
+                        { 44, 133, 149}, { 24, 107, 143}, {  8,  78, 121},
+                        {  3,  59,  97}, {  1,  42,  71}, {  1,  22,  37},
+                    },
+                    {  // band 3
+                        { 29, 160, 171}, {  9, 114, 158}, {  1,  76, 125},
+                        {  1,  54,  93}, {  1,  36,  63}, {  1,  20,  35},
+                    },
+                    {  // band 4
+                        { 22, 188, 205}, {  6, 132, 186}, {  1,  87, 144},
+                        {  1,  62, 107}, {  1,  41,  72}, {  1,  23,  41},
+                    },
+                    {  // band 5
+                        { 25, 233, 236}, {  5, 165, 214}, {  1,  96, 158},
+                        {  1,  63, 112}, {  1,  40,  73}, {  1,  23,  40},
+                    },
+                },
+                {  // Intra
+                    {  // band 0
+                        { 48,  20, 231}, { 37,  21, 179}, { 15,  18, 109},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 41, 154, 216}, {196, 142, 221}, {131, 125, 207},
+                        { 84, 111, 181}, { 45,  91, 142}, { 27,  62,  89},
+                    },
+                    {  // band 2
+                        { 72, 181, 230}, { 41, 147, 215}, { 10, 102, 173},
+                        {  3,  73, 132}, {  1,  47,  89}, {  1,  23,  50},
+                    },
+                    {  // band 3
+                        { 60, 201, 236}, { 23, 157, 219}, {  2,  99, 167},
+                        {  1,  69, 124}, {  1,  43,  80}, {  1,  22,  39},
+                    },
+                    {  // band 4
+                        { 53, 214, 242}, { 15, 165, 224}, {  1, 101, 173},
+                        {  1,  70, 131}, {  1,  44,  83}, {  1,  23,  49},
+                    },
+                    {  // band 5
+                        { 39, 239, 248}, {  7, 186, 233}, {  1, 108, 174},
+                        {  1,  70, 123}, {  1,  43,  77}, {  1,  16,  42},
+                    },
+                },
+            },
+            {  // UV plane
+                {  // Inter
+                    {  // band 0
+                        {161,  26, 204}, { 77,  40, 160}, { 26,  50, 117},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 80, 140, 218}, {136, 133, 215}, { 63, 117, 197},
+                        { 20,  93, 170}, {  7,  55, 102}, { 13,  32,  52},
+                    },
+                    {  // band 2
+                        { 86, 173, 231}, { 46, 150, 220}, { 18, 118, 190},
+                        {  8,  90, 150}, {  2,  60,  95}, {  1,  39,  41},
+                    },
+                    {  // band 3
+                        { 80, 183, 242}, { 37, 160, 231}, {  6, 120, 182},
+                        {  1,  86, 137}, {  1,  46,  78}, {  1,  15,  24},
+                    },
+                    {  // band 4
+                        { 88, 215, 247}, { 42, 179, 235}, {  4, 116, 182},
+                        {  2,  80, 133}, {  1,  46,  85}, {  1,  64,  43},
+                    },
+                    {  // band 5
+                        {100, 236, 250}, { 31, 186, 234}, {  1, 114, 181},
+                        {  1,  85, 135}, {  1,  78,  64}, {128, 128, 128},
+                    },
+                },
+                {  // Inter
+                    {  // band 0
+                        {213,  13, 245}, {106,  16, 211}, { 32,  11, 156},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {140, 214, 247}, {241, 186, 243}, {177, 172, 235},
+                        {128, 156, 219}, {106, 130, 191}, { 99, 105, 152},
+                    },
+                    {  // band 2
+                        {125, 218, 248}, { 75, 167, 239}, { 29, 111, 212},
+                        {  6,  66, 152}, {  1,  42,  96}, {  1,  85, 128},
+                    },
+                    {  // band 3
+                        {120, 232, 252}, { 60, 189, 247}, {  8, 141, 200},
+                        {  1,  89, 134}, {  1,  32, 128}, {128, 128, 128},
+                    },
+                    {  // band 4
+                        {111, 238, 253}, { 56, 198, 245}, {  1, 123, 208},
+                        {  1,  93, 176}, {  1,   1,  73}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        { 98, 251, 249}, { 56, 189, 244}, { 17, 113, 220},
+                        {  1, 109, 179}, {128, 128, 128}, {128, 128, 128},
+                    },
+                },
+            },
+        },
+    },
+    {  // Q_Index 3
+        {  // TX_SIZE 0
+            {  // Y plane
+                {  // Intra
+                    {  // band 0
+                        {186,  16, 200}, {122,  31, 187}, { 78,  40, 161},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {146, 119, 245}, {182, 115, 244}, {130, 113, 238},
+                        { 88, 110, 225}, { 47, 103, 208}, {  5, 102, 188},
+                    },
+                    {  // band 2
+                        {164, 157, 248}, {155, 141, 250}, { 71, 116, 243},
+                        { 88, 129, 233}, { 50,  99, 228}, { 26, 148, 191},
+                    },
+                    {  // band 3
+                        {200, 158, 253}, {177, 118, 252}, { 99, 113, 245},
+                        { 77, 120, 210}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 4
+                        {231, 104, 254}, {209,  82, 254}, {143, 112, 252},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        {250,  36, 254}, {243,  55, 254}, {223, 170, 254},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                },
+                {  // Intra
+                    {  // band 0
+                        {207,  37, 226}, {164,  46, 218}, {122,  58, 201},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {149, 154, 253}, {170, 137, 253}, { 94, 123, 247},
+                        { 42, 113, 222}, { 16,  97, 174}, { 49,  98, 159},
+                    },
+                    {  // band 2
+                        {177, 162, 253}, {165, 142, 252}, { 51, 108, 243},
+                        { 18, 108, 213}, {  1,  98, 254}, {128, 128, 128},
+                    },
+                    {  // band 3
+                        {211, 152, 254}, {184, 116, 254}, { 70, 110, 244},
+                        {  8, 108, 237}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 4
+                        {236,  89, 254}, {210,  67, 254}, {112, 111, 248},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        {246,  26, 254}, {233,  35, 254}, {128,   1, 254},
+                        {254, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                },
+            },
+            {  // UV plane
+                {  // Inter
+                    {  // band 0
+                        {247,   2, 247}, {226,   8, 242}, {191,  14, 235},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {231,  94, 254}, {248,  91, 254}, {186,  89, 252},
+                        {128,  92, 244}, { 79, 112, 254}, {128, 128, 128},
+                    },
+                    {  // band 2
+                        {228, 145, 253}, {240, 130, 254}, {223, 105, 254},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 3
+                        {245, 153, 253}, {240, 120, 254}, {128, 128, 128},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 4
+                        {254, 128, 254}, {204, 128, 254}, {128, 128, 128},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                },
+                {  // Inter
+                    {  // band 0
+                        {253,   7, 249}, {224,   9, 244}, {182,  13, 231},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {234, 109, 254}, {242, 104, 254}, {160,  98, 254},
+                        {123,  85, 243}, { 82,  43, 217}, {128, 128, 128},
+                    },
+                    {  // band 2
+                        {243, 137, 254}, {240, 118, 254}, {136,  53, 254},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 3
+                        {251, 173, 254}, {229, 129, 250}, {128, 128, 128},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 4
+                        {254, 119, 254}, {254, 128, 128}, {128, 128, 128},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                },
+            },
+        },
+        {  // TX_SIZE 1
+            {  // Y plane
+                {  // Intra
+                    {  // band 0
+                        { 49,  26, 159}, { 36,  34, 150}, { 26,  38, 124},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 99, 122, 226}, {143, 119, 225}, { 90, 113, 213},
+                        { 46, 102, 193}, { 14,  84, 157}, {  3,  59, 107},
+                    },
+                    {  // band 2
+                        {109, 164, 237}, { 74, 142, 233}, { 29, 112, 216},
+                        { 14,  92, 184}, { 10,  80, 156}, {  1,  52, 137},
+                    },
+                    {  // band 3
+                        {110, 191, 245}, { 59, 156, 240}, { 18, 121, 220},
+                        {  8,  97, 184}, {  3,  84, 150}, {128, 128, 128},
+                    },
+                    {  // band 4
+                        {115, 203, 250}, { 59, 167, 246}, { 16, 130, 226},
+                        {  7,  97, 192}, {  1,  71,  99}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        {149, 218, 253}, { 93, 171, 251}, { 28, 125, 233},
+                        { 28,  99, 192}, {128,  85,  85}, {128, 128, 128},
+                    },
+                },
+                {  // Intra
+                    {  // band 0
+                        { 97,  45, 229}, { 79,  52, 205}, { 46,  58, 171},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 99, 180, 249}, {156, 165, 249}, { 73, 141, 237},
+                        { 31, 116, 208}, { 13,  81, 153}, {  5,  42,  86},
+                    },
+                    {  // band 2
+                        {113, 188, 251}, { 68, 161, 244}, { 16, 108, 216},
+                        {  6,  81, 168}, {  2,  65, 118}, {128,   1,   1},
+                    },
+                    {  // band 3
+                        {117, 201, 252}, { 62, 171, 248}, { 12, 119, 221},
+                        {  5,  90, 182}, {  4,  66, 116}, {128, 128, 128},
+                    },
+                    {  // band 4
+                        {128, 207, 253}, { 70, 176, 251}, { 11, 126, 228},
+                        {  6,  89, 189}, {  1,  44, 148}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        {162, 218, 254}, {107, 170, 253}, { 22, 131, 238},
+                        {  1,  77, 182}, {  1, 254, 128}, {128, 128, 128},
+                    },
+                },
+            },
+            {  // UV plane
+                {  // Inter
+                    {  // band 0
+                        {235,   5, 238}, {194,  14, 223}, {152,  22, 205},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {200, 121, 251}, {241, 115, 252}, {167, 108, 248},
+                        { 93,  93, 233}, { 36,  66, 189}, {128, 128, 128},
+                    },
+                    {  // band 2
+                        {220, 151, 253}, {176, 135, 252}, { 95, 124, 254},
+                        { 64, 105, 217}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 3
+                        {225, 189, 254}, {175, 155, 254}, {102, 119, 254},
+                        {  1,   1,   1}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 4
+                        {218, 195, 254}, {125, 157, 253}, {128, 128, 254},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        {221, 197, 254}, { 85, 210, 254}, {128, 128, 128},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                },
+                {  // Inter
+                    {  // band 0
+                        {250,   9, 246}, {204,  13, 234}, {144,  18, 211},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {213, 157, 253}, {243, 138, 253}, {170, 117, 250},
+                        {109,  91, 233}, { 66,  77, 163}, { 64,  85, 254},
+                    },
+                    {  // band 2
+                        {221, 169, 254}, {182, 141, 253}, {112, 120, 239},
+                        { 85, 165, 254}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 3
+                        {226, 192, 254}, {189, 174, 251}, {153, 128, 254},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 4
+                        {232, 192, 254}, {195, 187, 247}, {  1, 191, 254},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        {247, 185, 254}, {254,  93, 254}, {128, 128, 128},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                },
+            },
+        },
+        {  // TX_SIZE 2
+            {  // Y plane
+                {  // Intra
+                    {  // band 0
+                        { 14,  30, 136}, { 15,  33, 120}, { 10,  33,  90},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 92, 109, 209}, {113, 108, 207}, { 77, 102, 193},
+                        { 39,  91, 171}, { 11,  70, 129}, {  2,  44,  77},
+                    },
+                    {  // band 2
+                        { 99, 158, 223}, { 66, 135, 217}, { 23, 109, 194},
+                        {  9,  85, 160}, {  3,  66, 124}, {  1,  51, 100},
+                    },
+                    {  // band 3
+                        { 89, 189, 234}, { 46, 149, 225}, { 10, 110, 194},
+                        {  2,  83, 156}, {  1,  57, 113}, {  1,  47,  73},
+                    },
+                    {  // band 4
+                        { 78, 206, 242}, { 28, 161, 232}, {  3, 114, 200},
+                        {  1,  86, 161}, {  1,  62, 118}, {  1,   1,   1},
+                    },
+                    {  // band 5
+                        { 72, 227, 250}, { 20, 182, 242}, {  3, 126, 210},
+                        {  2,  91, 166}, {  1,  64, 126}, {128, 128, 128},
+                    },
+                },
+                {  // Intra
+                    {  // band 0
+                        { 23,  42, 227}, { 41,  43, 195}, { 25,  45, 146},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {100, 172, 245}, {165, 158, 246}, { 88, 137, 234},
+                        { 44, 116, 203}, { 18,  85, 149}, {  7,  56,  92},
+                    },
+                    {  // band 2
+                        {117, 188, 247}, { 70, 155, 239}, { 18, 105, 204},
+                        {  7,  78, 158}, {  2,  50, 111}, {  1,  38,  77},
+                    },
+                    {  // band 3
+                        {104, 207, 250}, { 54, 166, 241}, {  6, 110, 199},
+                        {  1,  78, 155}, {  1,  45, 100}, {  1,   1,   1},
+                    },
+                    {  // band 4
+                        { 87, 216, 251}, { 30, 177, 243}, {  1, 114, 203},
+                        {  1,  85, 157}, {  1,  53, 108}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        { 80, 230, 253}, { 23, 193, 248}, {  1, 127, 215},
+                        {  1,  94, 170}, {  1,  71,  59}, {128, 128, 128},
+                    },
+                },
+            },
+            {  // UV plane
+                {  // Inter
+                    {  // band 0
+                        {222,   9, 234}, {161,  20, 210}, {113,  30, 185},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {195, 120, 248}, {231, 124, 247}, {148, 116, 238},
+                        { 64,  98, 207}, { 20,  70, 147}, { 87,  68, 100},
+                    },
+                    {  // band 2
+                        {186, 161, 250}, {124, 148, 245}, { 44, 123, 230},
+                        { 23, 107, 205}, {  1,  80, 131}, {128, 128, 128},
+                    },
+                    {  // band 3
+                        {172, 196, 252}, {110, 160, 248}, { 37, 134, 235},
+                        { 23, 125, 200}, {128, 254, 128}, {128, 128, 128},
+                    },
+                    {  // band 4
+                        {173, 209, 253}, {103, 175, 250}, {  1, 120, 240},
+                        {  1, 146, 254}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        {184, 235, 254}, { 81, 186, 251}, {128, 109, 254},
+                        {128, 254, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                },
+                {  // Inter
+                    {  // band 0
+                        {248,   8, 243}, {185,  11, 225}, {108,  11, 189},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {208, 158, 254}, {244, 147, 252}, {195, 132, 248},
+                        {161, 122, 224}, {129, 114, 188}, { 59, 119, 159},
+                    },
+                    {  // band 2
+                        {202, 182, 253}, {143, 161, 251}, { 73, 115, 247},
+                        {146, 175, 204}, {128,   1, 254}, {128, 128, 128},
+                    },
+                    {  // band 3
+                        {202, 204, 254}, {131, 174, 251}, { 18, 153, 207},
+                        {128, 254, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 4
+                        {192, 221, 254}, {114, 190, 254}, {128, 170, 254},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        {166, 236, 254}, {119, 200, 254}, {128, 128, 128},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                },
+            },
+        },
+        {  // TX_SIZE 3
+            {  // Y plane
+                {  // Intra
+                    {  // band 0
+                        { 30,  32, 144}, { 21,  35,  96}, {  4,  27,  55},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 35, 107, 172}, { 61, 104, 170}, { 33,  94, 160},
+                        { 13,  80, 139}, {  2,  55,  97}, {  1,  28,  49},
+                    },
+                    {  // band 2
+                        { 51, 153, 195}, { 29, 129, 189}, {  9,  99, 163},
+                        {  3,  75, 129}, {  1,  49,  88}, {  1,  29,  50},
+                    },
+                    {  // band 3
+                        { 53, 164, 210}, { 21, 134, 201}, {  3,  97, 164},
+                        {  1,  69, 124}, {  1,  45,  82}, {  1,  31,  58},
+                    },
+                    {  // band 4
+                        { 47, 205, 234}, { 18, 158, 220}, {  2, 109, 177},
+                        {  1,  78, 137}, {  1,  53, 101}, {  1,  34,  70},
+                    },
+                    {  // band 5
+                        { 55, 233, 245}, { 16, 179, 233}, {  1, 116, 191},
+                        {  1,  79, 145}, {  1,  53, 101}, {  1,  37,  58},
+                    },
+                },
+                {  // Intra
+                    {  // band 0
+                        { 36,  33, 227}, { 39,  28, 190}, { 18,  27, 134},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        { 76, 156, 235}, {184, 147, 235}, {114, 130, 220},
+                        { 72, 112, 191}, { 42,  87, 144}, { 21,  65,  93},
+                    },
+                    {  // band 2
+                        { 96, 179, 240}, { 51, 149, 228}, { 12, 105, 191},
+                        {  6,  74, 148}, {  1,  47, 100}, {  1,  29,  53},
+                    },
+                    {  // band 3
+                        { 88, 191, 242}, { 35, 154, 231}, {  3, 106, 187},
+                        {  1,  74, 140}, {  1,  41,  84}, {  1,  25,  38},
+                    },
+                    {  // band 4
+                        { 77, 212, 249}, { 28, 171, 239}, {  2, 117, 199},
+                        {  1,  79, 151}, {  1,  45,  99}, {  1,   1,   1},
+                    },
+                    {  // band 5
+                        { 77, 236, 252}, { 27, 190, 246}, {  2, 120, 203},
+                        {  1,  78, 147}, {  1,  42,  72}, {128, 128, 128},
+                    },
+                },
+            },
+            {  // UV plane
+                {  // Inter
+                    {  // band 0
+                        {185,  11, 227}, {113,  30, 182}, { 57,  44, 144},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {151, 139, 244}, {212, 139, 241}, {124, 126, 231},
+                        { 59, 104, 213}, { 26,  73, 158}, { 20,  45,  95},
+                    },
+                    {  // band 2
+                        {155, 163, 247}, {108, 152, 239}, { 39, 124, 214},
+                        {  7, 109, 162}, { 29,  57, 128}, {128, 128, 128},
+                    },
+                    {  // band 3
+                        {158, 176, 250}, { 89, 164, 243}, { 11, 114, 196},
+                        {  1,  96, 141}, {  1,  81, 118}, {128,   1,   1},
+                    },
+                    {  // band 4
+                        {148, 212, 251}, { 59, 174, 240}, {  2, 130, 203},
+                        {  1,  70, 168}, {  1,  51, 106}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        {104, 237, 252}, { 39, 190, 246}, {  1, 154, 220},
+                        {128, 102,   1}, {128, 128, 128}, {128, 128, 128},
+                    },
+                },
+                {  // Inter
+                    {  // band 0
+                        {236,   6, 242}, {111,   6, 206}, { 36,   5, 161},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 1
+                        {193, 193, 252}, {248, 182, 251}, {218, 150, 246},
+                        {182, 134, 244}, {151, 137, 227}, { 45, 102, 195},
+                    },
+                    {  // band 2
+                        {188, 202, 251}, {125, 165, 249}, { 64,  75, 218},
+                        {  1, 128, 254}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 3
+                        {178, 225, 254}, {107, 188, 231}, { 21, 135, 233},
+                        {128,   1, 254}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 4
+                        {164, 227, 253}, { 55, 193, 251}, {  1, 111, 225},
+                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+                    },
+                    {  // band 5
+                        {151, 243, 254}, { 50, 203, 254}, {128, 179, 254},
+                        {128,   1, 254}, {128, 128, 128}, {128, 128, 128},
+                    },
+                },
+            },
+        },
+    },
+};
+#else
+static const vp10_coeff_probs_model default_coef_probs_4x4[PLANE_TYPES] = {
+  {  // Y plane
+    {  // Intra
+      {  // Band 0
+        { 195,  29, 183 }, {  84,  49, 136 }, {   8,  42,  71 }
+      }, {  // Band 1
+        {  31, 107, 169 }, {  35,  99, 159 }, {  17,  82, 140 },
+        {   8,  66, 114 }, {   2,  44,  76 }, {   1,  19,  32 }
+      }, {  // Band 2
+        {  40, 132, 201 }, {  29, 114, 187 }, {  13,  91, 157 },
+        {   7,  75, 127 }, {   3,  58,  95 }, {   1,  28,  47 }
+      }, {  // Band 3
+        {  69, 142, 221 }, {  42, 122, 201 }, {  15,  91, 159 },
+        {   6,  67, 121 }, {   1,  42,  77 }, {   1,  17,  31 }
+      }, {  // Band 4
+        { 102, 148, 228 }, {  67, 117, 204 }, {  17,  82, 154 },
+        {   6,  59, 114 }, {   2,  39,  75 }, {   1,  15,  29 }
+      }, {  // Band 5
+        { 156,  57, 233 }, { 119,  57, 212 }, {  58,  48, 163 },
+        {  29,  40, 124 }, {  12,  30,  81 }, {   3,  12,  31 }
+      }
+    }, {  // Inter
+      {  // Band 0
+        { 191, 107, 226 }, { 124, 117, 204 }, {  25,  99, 155 }
+      }, {  // Band 1
+        {  29, 148, 210 }, {  37, 126, 194 }, {   8,  93, 157 },
+        {   2,  68, 118 }, {   1,  39,  69 }, {   1,  17,  33 }
+      }, {  // Band 2
+        {  41, 151, 213 }, {  27, 123, 193 }, {   3,  82, 144 },
+        {   1,  58, 105 }, {   1,  32,  60 }, {   1,  13,  26 }
+      }, {  // Band 3
+        {  59, 159, 220 }, {  23, 126, 198 }, {   4,  88, 151 },
+        {   1,  66, 114 }, {   1,  38,  71 }, {   1,  18,  34 }
+      }, {  // Band 4
+        { 114, 136, 232 }, {  51, 114, 207 }, {  11,  83, 155 },
+        {   3,  56, 105 }, {   1,  33,  65 }, {   1,  17,  34 }
+      }, {  // Band 5
+        { 149,  65, 234 }, { 121,  57, 215 }, {  61,  49, 166 },
+        {  28,  36, 114 }, {  12,  25,  76 }, {   3,  16,  42 }
+      }
+    }
+  }, {  // UV plane
+    {  // Intra
+      {  // Band 0
+        { 214,  49, 220 }, { 132,  63, 188 }, {  42,  65, 137 }
+      }, {  // Band 1
+        {  85, 137, 221 }, { 104, 131, 216 }, {  49, 111, 192 },
+        {  21,  87, 155 }, {   2,  49,  87 }, {   1,  16,  28 }
+      }, {  // Band 2
+        {  89, 163, 230 }, {  90, 137, 220 }, {  29, 100, 183 },
+        {  10,  70, 135 }, {   2,  42,  81 }, {   1,  17,  33 }
+      }, {  // Band 3
+        { 108, 167, 237 }, {  55, 133, 222 }, {  15,  97, 179 },
+        {   4,  72, 135 }, {   1,  45,  85 }, {   1,  19,  38 }
+      }, {  // Band 4
+        { 124, 146, 240 }, {  66, 124, 224 }, {  17,  88, 175 },
+        {   4,  58, 122 }, {   1,  36,  75 }, {   1,  18,  37 }
+      }, {  //  Band 5
+        { 141,  79, 241 }, { 126,  70, 227 }, {  66,  58, 182 },
+        {  30,  44, 136 }, {  12,  34,  96 }, {   2,  20,  47 }
+      }
+    }, {  // Inter
+      {  // Band 0
+        { 229,  99, 249 }, { 143, 111, 235 }, {  46, 109, 192 }
+      }, {  // Band 1
+        {  82, 158, 236 }, {  94, 146, 224 }, {  25, 117, 191 },
+        {   9,  87, 149 }, {   3,  56,  99 }, {   1,  33,  57 }
+      }, {  // Band 2
+        {  83, 167, 237 }, {  68, 145, 222 }, {  10, 103, 177 },
+        {   2,  72, 131 }, {   1,  41,  79 }, {   1,  20,  39 }
+      }, {  // Band 3
+        {  99, 167, 239 }, {  47, 141, 224 }, {  10, 104, 178 },
+        {   2,  73, 133 }, {   1,  44,  85 }, {   1,  22,  47 }
+      }, {  // Band 4
+        { 127, 145, 243 }, {  71, 129, 228 }, {  17,  93, 177 },
+        {   3,  61, 124 }, {   1,  41,  84 }, {   1,  21,  52 }
+      }, {  // Band 5
+        { 157,  78, 244 }, { 140,  72, 231 }, {  69,  58, 184 },
+        {  31,  44, 137 }, {  14,  38, 105 }, {   8,  23,  61 }
+      }
+    }
+  }
+};
+
+static const vp10_coeff_probs_model default_coef_probs_8x8[PLANE_TYPES] = {
+  {  // Y plane
+    {  // Intra
+      {  // Band 0
+        { 125,  34, 187 }, {  52,  41, 133 }, {   6,  31,  56 }
+      }, {  // Band 1
+        {  37, 109, 153 }, {  51, 102, 147 }, {  23,  87, 128 },
+        {   8,  67, 101 }, {   1,  41,  63 }, {   1,  19,  29 }
+      }, {  // Band 2
+        {  31, 154, 185 }, {  17, 127, 175 }, {   6,  96, 145 },
+        {   2,  73, 114 }, {   1,  51,  82 }, {   1,  28,  45 }
+      }, {  // Band 3
+        {  23, 163, 200 }, {  10, 131, 185 }, {   2,  93, 148 },
+        {   1,  67, 111 }, {   1,  41,  69 }, {   1,  14,  24 }
+      }, {  // Band 4
+        {  29, 176, 217 }, {  12, 145, 201 }, {   3, 101, 156 },
+        {   1,  69, 111 }, {   1,  39,  63 }, {   1,  14,  23 }
+      }, {  // Band 5
+        {  57, 192, 233 }, {  25, 154, 215 }, {   6, 109, 167 },
+        {   3,  78, 118 }, {   1,  48,  69 }, {   1,  21,  29 }
+      }
+    }, {  // Inter
+      {  // Band 0
+        { 202, 105, 245 }, { 108, 106, 216 }, {  18,  90, 144 }
+      }, {  // Band 1
+        {  33, 172, 219 }, {  64, 149, 206 }, {  14, 117, 177 },
+        {   5,  90, 141 }, {   2,  61,  95 }, {   1,  37,  57 }
+      }, {  // Band 2
+        {  33, 179, 220 }, {  11, 140, 198 }, {   1,  89, 148 },
+        {   1,  60, 104 }, {   1,  33,  57 }, {   1,  12,  21 }
+      }, {  // Band 3
+        {  30, 181, 221 }, {   8, 141, 198 }, {   1,  87, 145 },
+        {   1,  58, 100 }, {   1,  31,  55 }, {   1,  12,  20 }
+      }, {  // Band 4
+        {  32, 186, 224 }, {   7, 142, 198 }, {   1,  86, 143 },
+        {   1,  58, 100 }, {   1,  31,  55 }, {   1,  12,  22 }
+      }, {  // Band 5
+        {  57, 192, 227 }, {  20, 143, 204 }, {   3,  96, 154 },
+        {   1,  68, 112 }, {   1,  42,  69 }, {   1,  19,  32 }
+      }
+    }
+  }, {  // UV plane
+    {  // Intra
+      {  // Band 0
+        { 212,  35, 215 }, { 113,  47, 169 }, {  29,  48, 105 }
+      }, {  // Band 1
+        {  74, 129, 203 }, { 106, 120, 203 }, {  49, 107, 178 },
+        {  19,  84, 144 }, {   4,  50,  84 }, {   1,  15,  25 }
+      }, {  // Band 2
+        {  71, 172, 217 }, {  44, 141, 209 }, {  15, 102, 173 },
+        {   6,  76, 133 }, {   2,  51,  89 }, {   1,  24,  42 }
+      }, {  // Band 3
+        {  64, 185, 231 }, {  31, 148, 216 }, {   8, 103, 175 },
+        {   3,  74, 131 }, {   1,  46,  81 }, {   1,  18,  30 }
+      }, {  // Band 4
+        {  65, 196, 235 }, {  25, 157, 221 }, {   5, 105, 174 },
+        {   1,  67, 120 }, {   1,  38,  69 }, {   1,  15,  30 }
+      }, {  // Band 5
+        {  65, 204, 238 }, {  30, 156, 224 }, {   7, 107, 177 },
+        {   2,  70, 124 }, {   1,  42,  73 }, {   1,  18,  34 }
+      }
+    }, {  // Inter
+      {  // Band 0
+        { 225,  86, 251 }, { 144, 104, 235 }, {  42,  99, 181 }
+      }, {  // Band 1
+        {  85, 175, 239 }, { 112, 165, 229 }, {  29, 136, 200 },
+        {  12, 103, 162 }, {   6,  77, 123 }, {   2,  53,  84 }
+      }, {  // Band 2
+        {  75, 183, 239 }, {  30, 155, 221 }, {   3, 106, 171 },
+        {   1,  74, 128 }, {   1,  44,  76 }, {   1,  17,  28 }
+      }, {  // Band 3
+        {  73, 185, 240 }, {  27, 159, 222 }, {   2, 107, 172 },
+        {   1,  75, 127 }, {   1,  42,  73 }, {   1,  17,  29 }
+      }, {  // Band 4
+        {  62, 190, 238 }, {  21, 159, 222 }, {   2, 107, 172 },
+        {   1,  72, 122 }, {   1,  40,  71 }, {   1,  18,  32 }
+      }, {  // Band 5
+        {  61, 199, 240 }, {  27, 161, 226 }, {   4, 113, 180 },
+        {   1,  76, 129 }, {   1,  46,  80 }, {   1,  23,  41 }
+      }
+    }
+  }
+};
+
+static const vp10_coeff_probs_model default_coef_probs_16x16[PLANE_TYPES] = {
+  {  // Y plane
+    {  // Intra
+      {  // Band 0
+        {   7,  27, 153 }, {   5,  30,  95 }, {   1,  16,  30 }
+      }, {  // Band 1
+        {  50,  75, 127 }, {  57,  75, 124 }, {  27,  67, 108 },
+        {  10,  54,  86 }, {   1,  33,  52 }, {   1,  12,  18 }
+      }, {  // Band 2
+        {  43, 125, 151 }, {  26, 108, 148 }, {   7,  83, 122 },
+        {   2,  59,  89 }, {   1,  38,  60 }, {   1,  17,  27 }
+      }, {  // Band 3
+        {  23, 144, 163 }, {  13, 112, 154 }, {   2,  75, 117 },
+        {   1,  50,  81 }, {   1,  31,  51 }, {   1,  14,  23 }
+      }, {  // Band 4
+        {  18, 162, 185 }, {   6, 123, 171 }, {   1,  78, 125 },
+        {   1,  51,  86 }, {   1,  31,  54 }, {   1,  14,  23 }
+      }, {  // Band 5
+        {  15, 199, 227 }, {   3, 150, 204 }, {   1,  91, 146 },
+        {   1,  55,  95 }, {   1,  30,  53 }, {   1,  11,  20 }
+      }
+    }, {  // Inter
+      {  // Band 0
+        {  19,  55, 240 }, {  19,  59, 196 }, {   3,  52, 105 }
+      }, {  // Band 1
+        {  41, 166, 207 }, { 104, 153, 199 }, {  31, 123, 181 },
+        {  14, 101, 152 }, {   5,  72, 106 }, {   1,  36,  52 }
+      }, {  // Band 2
+        {  35, 176, 211 }, {  12, 131, 190 }, {   2,  88, 144 },
+        {   1,  60, 101 }, {   1,  36,  60 }, {   1,  16,  28 }
+      }, {  // Band 3
+        {  28, 183, 213 }, {   8, 134, 191 }, {   1,  86, 142 },
+        {   1,  56,  96 }, {   1,  30,  53 }, {   1,  12,  20 }
+      }, {  // Band 4
+        {  20, 190, 215 }, {   4, 135, 192 }, {   1,  84, 139 },
+        {   1,  53,  91 }, {   1,  28,  49 }, {   1,  11,  20 }
+      }, {  // Band 5
+        {  13, 196, 216 }, {   2, 137, 192 }, {   1,  86, 143 },
+        {   1,  57,  99 }, {   1,  32,  56 }, {   1,  13,  24 }
+      }
+    }
+  }, {  // UV plane
+    {  // Intra
+      {  // Band 0
+        { 211,  29, 217 }, {  96,  47, 156 }, {  22,  43,  87 }
+      }, {  // Band 1
+        {  78, 120, 193 }, { 111, 116, 186 }, {  46, 102, 164 },
+        {  15,  80, 128 }, {   2,  49,  76 }, {   1,  18,  28 }
+      }, {  // Band 2
+        {  71, 161, 203 }, {  42, 132, 192 }, {  10,  98, 150 },
+        {   3,  69, 109 }, {   1,  44,  70 }, {   1,  18,  29 }
+      }, {  // Band 3
+        {  57, 186, 211 }, {  30, 140, 196 }, {   4,  93, 146 },
+        {   1,  62, 102 }, {   1,  38,  65 }, {   1,  16,  27 }
+      }, {  // Band 4
+        {  47, 199, 217 }, {  14, 145, 196 }, {   1,  88, 142 },
+        {   1,  57,  98 }, {   1,  36,  62 }, {   1,  15,  26 }
+      }, {  // Band 5
+        {  26, 219, 229 }, {   5, 155, 207 }, {   1,  94, 151 },
+        {   1,  60, 104 }, {   1,  36,  62 }, {   1,  16,  28 }
+      }
+    }, {  // Inter
+      {  // Band 0
+        { 233,  29, 248 }, { 146,  47, 220 }, {  43,  52, 140 }
+      }, {  // Band 1
+        { 100, 163, 232 }, { 179, 161, 222 }, {  63, 142, 204 },
+        {  37, 113, 174 }, {  26,  89, 137 }, {  18,  68,  97 }
+      }, {  // Band 2
+        {  85, 181, 230 }, {  32, 146, 209 }, {   7, 100, 164 },
+        {   3,  71, 121 }, {   1,  45,  77 }, {   1,  18,  30 }
+      }, {  // Band 3
+        {  65, 187, 230 }, {  20, 148, 207 }, {   2,  97, 159 },
+        {   1,  68, 116 }, {   1,  40,  70 }, {   1,  14,  29 }
+      }, {  // Band 4
+        {  40, 194, 227 }, {   8, 147, 204 }, {   1,  94, 155 },
+        {   1,  65, 112 }, {   1,  39,  66 }, {   1,  14,  26 }
+      }, {  // Band 5
+        {  16, 208, 228 }, {   3, 151, 207 }, {   1,  98, 160 },
+        {   1,  67, 117 }, {   1,  41,  74 }, {   1,  17,  31 }
+      }
+    }
+  }
+};
+
+static const vp10_coeff_probs_model default_coef_probs_32x32[PLANE_TYPES] = {
+  {  // Y plane
+    {  // Intra
+      {  // Band 0
+        {  17,  38, 140 }, {   7,  34,  80 }, {   1,  17,  29 }
+      }, {  // Band 1
+        {  37,  75, 128 }, {  41,  76, 128 }, {  26,  66, 116 },
+        {  12,  52,  94 }, {   2,  32,  55 }, {   1,  10,  16 }
+      }, {  // Band 2
+        {  50, 127, 154 }, {  37, 109, 152 }, {  16,  82, 121 },
+        {   5,  59,  85 }, {   1,  35,  54 }, {   1,  13,  20 }
+      }, {  // Band 3
+        {  40, 142, 167 }, {  17, 110, 157 }, {   2,  71, 112 },
+        {   1,  44,  72 }, {   1,  27,  45 }, {   1,  11,  17 }
+      }, {  // Band 4
+        {  30, 175, 188 }, {   9, 124, 169 }, {   1,  74, 116 },
+        {   1,  48,  78 }, {   1,  30,  49 }, {   1,  11,  18 }
+      }, {  // Band 5
+        {  10, 222, 223 }, {   2, 150, 194 }, {   1,  83, 128 },
+        {   1,  48,  79 }, {   1,  27,  45 }, {   1,  11,  17 }
+      }
+    }, {  // Inter
+      {  // Band 0
+        {  36,  41, 235 }, {  29,  36, 193 }, {  10,  27, 111 }
+      }, {  // Band 1
+        {  85, 165, 222 }, { 177, 162, 215 }, { 110, 135, 195 },
+        {  57, 113, 168 }, {  23,  83, 120 }, {  10,  49,  61 }
+      }, {  // Band 2
+        {  85, 190, 223 }, {  36, 139, 200 }, {   5,  90, 146 },
+        {   1,  60, 103 }, {   1,  38,  65 }, {   1,  18,  30 }
+      }, {  // Band 3
+        {  72, 202, 223 }, {  23, 141, 199 }, {   2,  86, 140 },
+        {   1,  56,  97 }, {   1,  36,  61 }, {   1,  16,  27 }
+      }, {  // Band 4
+        {  55, 218, 225 }, {  13, 145, 200 }, {   1,  86, 141 },
+        {   1,  57,  99 }, {   1,  35,  61 }, {   1,  13,  22 }
+      }, {  // Band 5
+        {  15, 235, 212 }, {   1, 132, 184 }, {   1,  84, 139 },
+        {   1,  57,  97 }, {   1,  34,  56 }, {   1,  14,  23 }
+      }
+    }
+  }, {  // UV plane
+    {  // Intra
+      {  // Band 0
+        { 181,  21, 201 }, {  61,  37, 123 }, {  10,  38,  71 }
+      }, {  // Band 1
+        {  47, 106, 172 }, {  95, 104, 173 }, {  42,  93, 159 },
+        {  18,  77, 131 }, {   4,  50,  81 }, {   1,  17,  23 }
+      }, {  // Band 2
+        {  62, 147, 199 }, {  44, 130, 189 }, {  28, 102, 154 },
+        {  18,  75, 115 }, {   2,  44,  65 }, {   1,  12,  19 }
+      }, {  // Band 3
+        {  55, 153, 210 }, {  24, 130, 194 }, {   3,  93, 146 },
+        {   1,  61,  97 }, {   1,  31,  50 }, {   1,  10,  16 }
+      }, {  // Band 4
+        {  49, 186, 223 }, {  17, 148, 204 }, {   1,  96, 142 },
+        {   1,  53,  83 }, {   1,  26,  44 }, {   1,  11,  17 }
+      }, {  // Band 5
+        {  13, 217, 212 }, {   2, 136, 180 }, {   1,  78, 124 },
+        {   1,  50,  83 }, {   1,  29,  49 }, {   1,  14,  23 }
+      }
+    }, {  // Inter
+      {  // Band 0
+        { 197,  13, 247 }, {  82,  17, 222 }, {  25,  17, 162 }
+      }, {  // Band 1
+        { 126, 186, 247 }, { 234, 191, 243 }, { 176, 177, 234 },
+        { 104, 158, 220 }, {  66, 128, 186 }, {  55,  90, 137 }
+      }, {  // Band 2
+        { 111, 197, 242 }, {  46, 158, 219 }, {   9, 104, 171 },
+        {   2,  65, 125 }, {   1,  44,  80 }, {   1,  17,  91 }
+      }, {  // Band 3
+        { 104, 208, 245 }, {  39, 168, 224 }, {   3, 109, 162 },
+        {   1,  79, 124 }, {   1,  50, 102 }, {   1,  43, 102 }
+      }, {  // Band 4
+        {  84, 220, 246 }, {  31, 177, 231 }, {   2, 115, 180 },
+        {   1,  79, 134 }, {   1,  55,  77 }, {   1,  60,  79 }
+      }, {  // Band 5
+        {  43, 243, 240 }, {   8, 180, 217 }, {   1, 115, 166 },
+        {   1,  84, 121 }, {   1,  51,  67 }, {   1,  16,   6 }
+      }
+    }
+  }
+};
+#endif  // CONFIG_ENTROPY
+/* clang-format on */
+
+static void extend_to_full_distribution(vpx_prob *probs, vpx_prob p) {
+  assert(p != 0);
+  memcpy(probs, vp10_pareto8_full[p - 1], MODEL_NODES * sizeof(vpx_prob));
+}
+
+void vp10_model_to_full_probs(const vpx_prob *model, vpx_prob *full) {
+  if (full != model)
+    memcpy(full, model, sizeof(vpx_prob) * UNCONSTRAINED_NODES);
+  extend_to_full_distribution(&full[UNCONSTRAINED_NODES], model[PIVOT_NODE]);
+}
+
+#if CONFIG_ANS
+void vp10_build_token_cdfs(const vpx_prob *pdf_model, rans_dec_lut cdf) {
+  AnsP10 pdf_tab[ENTROPY_TOKENS - 1];
+  assert(pdf_model[2] != 0);
+  // TODO(aconverse): Investigate making the precision of the zero and EOB tree
+  // nodes 10-bits.
+  rans_merge_prob8_pdf(pdf_tab, pdf_model[1],
+                       vp10_pareto8_token_probs[pdf_model[2] - 1],
+                       ENTROPY_TOKENS - 2);
+  rans_build_cdf_from_pdf(pdf_tab, cdf);
+}
+
+void vp10_coef_pareto_cdfs(FRAME_CONTEXT *fc) {
+  TX_SIZE t;
+  int i, j, k, l;
+  for (t = TX_4X4; t <= TX_32X32; ++t)
+    for (i = 0; i < PLANE_TYPES; ++i)
+      for (j = 0; j < REF_TYPES; ++j)
+        for (k = 0; k < COEF_BANDS; ++k)
+          for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l)
+            vp10_build_token_cdfs(fc->coef_probs[t][i][j][k][l],
+                                  fc->coef_cdfs[t][i][j][k][l]);
+}
+#endif  // CONFIG_ANS
+
+void vp10_default_coef_probs(VP10_COMMON *cm) {
+#if CONFIG_ENTROPY
+  const int index = VPXMIN(
+      ROUND_POWER_OF_TWO(cm->base_qindex, 8 - QCTX_BIN_BITS), QCTX_BINS - 1);
+  vp10_copy(cm->fc->coef_probs, default_qctx_coef_probs[index]);
+#else
+  vp10_copy(cm->fc->coef_probs[TX_4X4], default_coef_probs_4x4);
+  vp10_copy(cm->fc->coef_probs[TX_8X8], default_coef_probs_8x8);
+  vp10_copy(cm->fc->coef_probs[TX_16X16], default_coef_probs_16x16);
+  vp10_copy(cm->fc->coef_probs[TX_32X32], default_coef_probs_32x32);
+#endif  // CONFIG_ENTROPY
+#if CONFIG_ANS
+  vp10_coef_pareto_cdfs(cm->fc);
+#endif  // CONFIG_ANS
+}
+
+static void adapt_coef_probs(VP10_COMMON *cm, TX_SIZE tx_size,
+                             unsigned int count_sat,
+                             unsigned int update_factor) {
+  const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx];
+  vp10_coeff_probs_model *const probs = cm->fc->coef_probs[tx_size];
+#if CONFIG_ENTROPY
+  const vp10_coeff_probs_model *const pre_probs =
+      cm->partial_prob_update
+          ? (const vp10_coeff_probs_model *)cm->starting_coef_probs[tx_size]
+          : pre_fc->coef_probs[tx_size];
+#else
+  const vp10_coeff_probs_model *const pre_probs = pre_fc->coef_probs[tx_size];
+#endif  // CONFIG_ENTROPY
+  const vp10_coeff_count_model *const counts =
+      (const vp10_coeff_count_model *)cm->counts.coef[tx_size];
+  const unsigned int(*eob_counts)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] =
+      (const unsigned int(*)[
+          REF_TYPES][COEF_BANDS][COEFF_CONTEXTS])cm->counts.eob_branch[tx_size];
+  int i, j, k, l, m;
+
+  for (i = 0; i < PLANE_TYPES; ++i)
+    for (j = 0; j < REF_TYPES; ++j)
+      for (k = 0; k < COEF_BANDS; ++k)
+        for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
+          const int n0 = counts[i][j][k][l][ZERO_TOKEN];
+          const int n1 = counts[i][j][k][l][ONE_TOKEN];
+          const int n2 = counts[i][j][k][l][TWO_TOKEN];
+          const int neob = counts[i][j][k][l][EOB_MODEL_TOKEN];
+          const unsigned int branch_ct[UNCONSTRAINED_NODES][2] = {
+            { neob, eob_counts[i][j][k][l] - neob }, { n0, n1 + n2 }, { n1, n2 }
+          };
+          for (m = 0; m < UNCONSTRAINED_NODES; ++m)
+            probs[i][j][k][l][m] =
+                vp10_merge_probs(pre_probs[i][j][k][l][m], branch_ct[m],
+                                 count_sat, update_factor);
+        }
+}
+
+void vp10_adapt_coef_probs(VP10_COMMON *cm) {
+  TX_SIZE t;
+  unsigned int count_sat, update_factor;
+
+#if CONFIG_ENTROPY
+  if (cm->last_frame_type == KEY_FRAME) {
+    update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY_BITS; /* adapt quickly */
+    count_sat = COEF_COUNT_SAT_AFTER_KEY_BITS;
+  } else {
+    update_factor = COEF_MAX_UPDATE_FACTOR_BITS;
+    count_sat = COEF_COUNT_SAT_BITS;
+  }
+  if (cm->partial_prob_update == 1) {
+    update_factor = COEF_MAX_UPDATE_FACTOR_BITS;
+  }
+#else
+  if (cm->last_frame_type == KEY_FRAME) {
+    update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY; /* adapt quickly */
+    count_sat = COEF_COUNT_SAT_AFTER_KEY;
+  } else {
+    update_factor = COEF_MAX_UPDATE_FACTOR;
+    count_sat = COEF_COUNT_SAT;
+  }
+#endif  // CONFIG_ENTROPY
+  for (t = TX_4X4; t <= TX_32X32; t++)
+    adapt_coef_probs(cm, t, count_sat, update_factor);
+#if CONFIG_ANS
+  vp10_coef_pareto_cdfs(cm->fc);
+#endif
+}
+
+#if CONFIG_ENTROPY
+void vp10_partial_adapt_probs(VP10_COMMON *cm, int mi_row, int mi_col) {
+  (void)mi_row;
+  (void)mi_col;
+
+  if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
+    cm->partial_prob_update = 1;
+    vp10_adapt_coef_probs(cm);
+  }
+}
+#endif  // CONFIG_ENTROPY
diff --git a/av1/common/entropy.h b/av1/common/entropy.h
new file mode 100644
index 0000000..b0afd46
--- /dev/null
+++ b/av1/common/entropy.h
@@ -0,0 +1,315 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_ENTROPY_H_
+#define VP10_COMMON_ENTROPY_H_
+
+#include "aom/vpx_integer.h"
+#include "aom_dsp/prob.h"
+
+#if CONFIG_ANS
+#include "av1/common/ans.h"
+#endif  // CONFIG_ANS
+#include "av1/common/common.h"
+#include "av1/common/enums.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define DIFF_UPDATE_PROB 252
+#define GROUP_DIFF_UPDATE_PROB 252
+
+#if CONFIG_ENTROPY
+#define COEF_PROBS_BUFS 16
+#define QCTX_BIN_BITS 2
+#define QCTX_BINS (1 << QCTX_BIN_BITS)
+#endif  // CONFIG_ENTROPY
+
+// Coefficient token alphabet
+#define ZERO_TOKEN 0        // 0     Extra Bits 0+0
+#define ONE_TOKEN 1         // 1     Extra Bits 0+1
+#define TWO_TOKEN 2         // 2     Extra Bits 0+1
+#define THREE_TOKEN 3       // 3     Extra Bits 0+1
+#define FOUR_TOKEN 4        // 4     Extra Bits 0+1
+#define CATEGORY1_TOKEN 5   // 5-6   Extra Bits 1+1
+#define CATEGORY2_TOKEN 6   // 7-10  Extra Bits 2+1
+#define CATEGORY3_TOKEN 7   // 11-18 Extra Bits 3+1
+#define CATEGORY4_TOKEN 8   // 19-34 Extra Bits 4+1
+#define CATEGORY5_TOKEN 9   // 35-66 Extra Bits 5+1
+#define CATEGORY6_TOKEN 10  // 67+   Extra Bits 14+1
+#define EOB_TOKEN 11        // EOB   Extra Bits 0+0
+
+#define ENTROPY_TOKENS 12
+
+#define ENTROPY_NODES 11
+
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_pt_energy_class[ENTROPY_TOKENS]);
+
+#define CAT1_MIN_VAL 5
+#define CAT2_MIN_VAL 7
+#define CAT3_MIN_VAL 11
+#define CAT4_MIN_VAL 19
+#define CAT5_MIN_VAL 35
+#define CAT6_MIN_VAL 67
+
+// Extra bit probabilities.
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat1_prob[1]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat2_prob[2]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat3_prob[3]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat4_prob[4]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat5_prob[5]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat6_prob[14]);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat1_prob_high10[1]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat2_prob_high10[2]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat3_prob_high10[3]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat4_prob_high10[4]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat5_prob_high10[5]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat6_prob_high10[16]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat1_prob_high12[1]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat2_prob_high12[2]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat3_prob_high12[3]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat4_prob_high12[4]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat5_prob_high12[5]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat6_prob_high12[18]);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#define EOB_MODEL_TOKEN 3
+
+typedef struct {
+  const vpx_tree_index *tree;
+  const vpx_prob *prob;
+  int len;
+  int base_val;
+  const int16_t *cost;
+} vp10_extra_bit;
+
+// indexed by token value
+extern const vp10_extra_bit vp10_extra_bits[ENTROPY_TOKENS];
+#if CONFIG_VP9_HIGHBITDEPTH
+extern const vp10_extra_bit vp10_extra_bits_high10[ENTROPY_TOKENS];
+extern const vp10_extra_bit vp10_extra_bits_high12[ENTROPY_TOKENS];
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#define DCT_MAX_VALUE 16384
+#if CONFIG_VP9_HIGHBITDEPTH
+#define DCT_MAX_VALUE_HIGH10 65536
+#define DCT_MAX_VALUE_HIGH12 262144
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+/* Coefficients are predicted via a 3-dimensional probability table. */
+
+#define REF_TYPES 2  // intra=0, inter=1
+
+/* Middle dimension reflects the coefficient position within the transform. */
+#define COEF_BANDS 6
+
+/* Inside dimension is measure of nearby complexity, that reflects the energy
+   of nearby coefficients are nonzero.  For the first coefficient (DC, unless
+   block type is 0), we look at the (already encoded) blocks above and to the
+   left of the current block.  The context index is then the number (0,1,or 2)
+   of these blocks having nonzero coefficients.
+   After decoding a coefficient, the measure is determined by the size of the
+   most recently decoded coefficient.
+   Note that the intuitive meaning of this measure changes as coefficients
+   are decoded, e.g., prior to the first token, a zero means that my neighbors
+   are empty while, after the first token, because of the use of end-of-block,
+   a zero means we just decoded a zero and hence guarantees that a non-zero
+   coefficient will appear later in this block.  However, this shift
+   in meaning is perfectly OK because our context depends also on the
+   coefficient band (and since zigzag positions 0, 1, and 2 are in
+   distinct bands). */
+
+#define COEFF_CONTEXTS 6
+#define BAND_COEFF_CONTEXTS(band) ((band) == 0 ? 3 : COEFF_CONTEXTS)
+
+// #define ENTROPY_STATS
+
+typedef unsigned int
+    vp10_coeff_count[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS][ENTROPY_TOKENS];
+typedef unsigned int
+    vp10_coeff_stats[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS][ENTROPY_NODES][2];
+
+#define SUBEXP_PARAM 4   /* Subexponential code parameter */
+#define MODULUS_PARAM 13 /* Modulus parameter */
+
+struct VP10Common;
+void vp10_default_coef_probs(struct VP10Common *cm);
+void vp10_adapt_coef_probs(struct VP10Common *cm);
+#if CONFIG_ENTROPY
+void vp10_partial_adapt_probs(struct VP10Common *cm, int mi_row, int mi_col);
+#endif  // CONFIG_ENTROPY
+
+// This is the index in the scan order beyond which all coefficients for
+// 8x8 transform and above are in the top band.
+// This macro is currently unused but may be used by certain implementations
+#define MAXBAND_INDEX 21
+
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_8x8plus[1024]);
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_4x8_8x4[32]);
+#endif  // CONFIG_EXT_TX
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_4x4[16]);
+
+DECLARE_ALIGNED(16, extern const uint16_t, band_count_table[TX_SIZES_ALL][8]);
+DECLARE_ALIGNED(16, extern const uint16_t,
+                band_cum_count_table[TX_SIZES_ALL][8]);
+
+static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) {
+  switch (tx_size) {
+    case TX_4X4: return vp10_coefband_trans_4x4;
+#if CONFIG_EXT_TX
+    case TX_4X8: return vp10_coefband_trans_4x8_8x4;
+#endif  // CONFIG_EXT_TX
+    default: return vp10_coefband_trans_8x8plus;
+  }
+}
+
+// 128 lists of probabilities are stored for the following ONE node probs:
+// 1, 3, 5, 7, ..., 253, 255
+// In between probabilities are interpolated linearly
+
+#define COEFF_PROB_MODELS 255
+
+#define UNCONSTRAINED_NODES 3
+
+#define PIVOT_NODE 2  // which node is pivot
+
+#define MODEL_NODES (ENTROPY_NODES - UNCONSTRAINED_NODES)
+extern const vpx_tree_index vp10_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)];
+extern const vpx_prob vp10_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES];
+#if CONFIG_ANS
+extern const AnsP10
+    vp10_pareto8_token_probs[COEFF_PROB_MODELS][ENTROPY_TOKENS - 2];
+
+typedef rans_dec_lut coeff_cdf_model[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS];
+#endif  // CONFIG_ANS
+
+typedef vpx_prob vp10_coeff_probs_model[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS]
+                                       [UNCONSTRAINED_NODES];
+
+typedef unsigned int vp10_coeff_count_model
+    [REF_TYPES][COEF_BANDS][COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1];
+
+void vp10_model_to_full_probs(const vpx_prob *model, vpx_prob *full);
+
+typedef char ENTROPY_CONTEXT;
+
+static INLINE int combine_entropy_contexts(ENTROPY_CONTEXT a,
+                                           ENTROPY_CONTEXT b) {
+  return (a != 0) + (b != 0);
+}
+
+static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
+                                      const ENTROPY_CONTEXT *l) {
+  ENTROPY_CONTEXT above_ec = 0, left_ec = 0;
+
+  switch (tx_size) {
+    case TX_4X4:
+      above_ec = a[0] != 0;
+      left_ec = l[0] != 0;
+      break;
+#if CONFIG_EXT_TX
+    case TX_4X8:
+      above_ec = a[0] != 0;
+      left_ec = !!*(const uint16_t *)l;
+      break;
+    case TX_8X4:
+      above_ec = !!*(const uint16_t *)a;
+      left_ec = l[0] != 0;
+      break;
+    case TX_8X16:
+      above_ec = !!*(const uint16_t *)a;
+      left_ec = !!*(const uint32_t *)l;
+      break;
+    case TX_16X8:
+      above_ec = !!*(const uint32_t *)a;
+      left_ec = !!*(const uint16_t *)l;
+      break;
+    case TX_16X32:
+      above_ec = !!*(const uint32_t *)a;
+      left_ec = !!*(const uint64_t *)l;
+      break;
+    case TX_32X16:
+      above_ec = !!*(const uint64_t *)a;
+      left_ec = !!*(const uint32_t *)l;
+      break;
+#endif  // CONFIG_EXT_TX
+    case TX_8X8:
+      above_ec = !!*(const uint16_t *)a;
+      left_ec = !!*(const uint16_t *)l;
+      break;
+    case TX_16X16:
+      above_ec = !!*(const uint32_t *)a;
+      left_ec = !!*(const uint32_t *)l;
+      break;
+    case TX_32X32:
+      above_ec = !!*(const uint64_t *)a;
+      left_ec = !!*(const uint64_t *)l;
+      break;
+    default: assert(0 && "Invalid transform size."); break;
+  }
+  return combine_entropy_contexts(above_ec, left_ec);
+}
+
+#if CONFIG_ANS
+struct frame_contexts;
+void vp10_coef_pareto_cdfs(struct frame_contexts *fc);
+#endif  // CONFIG_ANS
+
+#if CONFIG_ENTROPY
+#define COEF_COUNT_SAT_BITS 5
+#define COEF_MAX_UPDATE_FACTOR_BITS 7
+#define COEF_COUNT_SAT_AFTER_KEY_BITS 5
+#define COEF_MAX_UPDATE_FACTOR_AFTER_KEY_BITS 7
+#define MODE_MV_COUNT_SAT_BITS 5
+#define MODE_MV_MAX_UPDATE_FACTOR_BITS 7
+
+#else
+
+#define COEF_COUNT_SAT 24
+#define COEF_MAX_UPDATE_FACTOR 112
+#define COEF_COUNT_SAT_AFTER_KEY 24
+#define COEF_MAX_UPDATE_FACTOR_AFTER_KEY 128
+
+#endif  // CONFIG_ENTROPY
+
+static INLINE vpx_prob vp10_merge_probs(vpx_prob pre_prob,
+                                        const unsigned int ct[2],
+                                        unsigned int count_sat,
+                                        unsigned int max_update_factor) {
+#if CONFIG_ENTROPY
+  const vpx_prob prob = get_binary_prob(ct[0], ct[1]);
+  const unsigned int count =
+      VPXMIN(ct[0] + ct[1], (unsigned int)(1 << count_sat));
+  const unsigned int factor = count << (max_update_factor - count_sat);
+  return weighted_prob(pre_prob, prob, factor);
+#else
+  return merge_probs(pre_prob, ct, count_sat, max_update_factor);
+#endif  // CONFIG_ENTROPY
+}
+
+static INLINE vpx_prob vp10_mode_mv_merge_probs(vpx_prob pre_prob,
+                                                const unsigned int ct[2]) {
+#if CONFIG_ENTROPY
+  return vp10_merge_probs(pre_prob, ct, MODE_MV_COUNT_SAT_BITS,
+                          MODE_MV_MAX_UPDATE_FACTOR_BITS);
+#else
+  return mode_mv_merge_probs(pre_prob, ct);
+#endif  // CONFIG_ENTROPY
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_ENTROPY_H_
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
new file mode 100644
index 0000000..98e26e7
--- /dev/null
+++ b/av1/common/entropymode.c
@@ -0,0 +1,1631 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "aom_mem/vpx_mem.h"
+
+#include "av1/common/reconinter.h"
+#include "av1/common/onyxc_int.h"
+#include "av1/common/seg_common.h"
+
+const vpx_prob vp10_kf_y_mode_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1] =
+    { {
+          // above = dc
+          { 137, 30, 42, 148, 151, 207, 70, 52, 91 },   // left = dc
+          { 92, 45, 102, 136, 116, 180, 74, 90, 100 },  // left = v
+          { 73, 32, 19, 187, 222, 215, 46, 34, 100 },   // left = h
+          { 91, 30, 32, 116, 121, 186, 93, 86, 94 },    // left = d45
+          { 72, 35, 36, 149, 68, 206, 68, 63, 105 },    // left = d135
+          { 73, 31, 28, 138, 57, 124, 55, 122, 151 },   // left = d117
+          { 67, 23, 21, 140, 126, 197, 40, 37, 171 },   // left = d153
+          { 86, 27, 28, 128, 154, 212, 45, 43, 53 },    // left = d207
+          { 74, 32, 27, 107, 86, 160, 63, 134, 102 },   // left = d63
+          { 59, 67, 44, 140, 161, 202, 78, 67, 119 }    // left = tm
+      },
+      {
+          // above = v
+          { 63, 36, 126, 146, 123, 158, 60, 90, 96 },   // left = dc
+          { 43, 46, 168, 134, 107, 128, 69, 142, 92 },  // left = v
+          { 44, 29, 68, 159, 201, 177, 50, 57, 77 },    // left = h
+          { 58, 38, 76, 114, 97, 172, 78, 133, 92 },    // left = d45
+          { 46, 41, 76, 140, 63, 184, 69, 112, 57 },    // left = d135
+          { 38, 32, 85, 140, 46, 112, 54, 151, 133 },   // left = d117
+          { 39, 27, 61, 131, 110, 175, 44, 75, 136 },   // left = d153
+          { 52, 30, 74, 113, 130, 175, 51, 64, 58 },    // left = d207
+          { 47, 35, 80, 100, 74, 143, 64, 163, 74 },    // left = d63
+          { 36, 61, 116, 114, 128, 162, 80, 125, 82 }   // left = tm
+      },
+      {
+          // above = h
+          { 82, 26, 26, 171, 208, 204, 44, 32, 105 },  // left = dc
+          { 55, 44, 68, 166, 179, 192, 57, 57, 108 },  // left = v
+          { 42, 26, 11, 199, 241, 228, 23, 15, 85 },   // left = h
+          { 68, 42, 19, 131, 160, 199, 55, 52, 83 },   // left = d45
+          { 58, 50, 25, 139, 115, 232, 39, 52, 118 },  // left = d135
+          { 50, 35, 33, 153, 104, 162, 64, 59, 131 },  // left = d117
+          { 44, 24, 16, 150, 177, 202, 33, 19, 156 },  // left = d153
+          { 55, 27, 12, 153, 203, 218, 26, 27, 49 },   // left = d207
+          { 53, 49, 21, 110, 116, 168, 59, 80, 76 },   // left = d63
+          { 38, 72, 19, 168, 203, 212, 50, 50, 107 }   // left = tm
+      },
+      {
+          // above = d45
+          { 103, 26, 36, 129, 132, 201, 83, 80, 93 },  // left = dc
+          { 59, 38, 83, 112, 103, 162, 98, 136, 90 },  // left = v
+          { 62, 30, 23, 158, 200, 207, 59, 57, 50 },   // left = h
+          { 67, 30, 29, 84, 86, 191, 102, 91, 59 },    // left = d45
+          { 60, 32, 33, 112, 71, 220, 64, 89, 104 },   // left = d135
+          { 53, 26, 34, 130, 56, 149, 84, 120, 103 },  // left = d117
+          { 53, 21, 23, 133, 109, 210, 56, 77, 172 },  // left = d153
+          { 77, 19, 29, 112, 142, 228, 55, 66, 36 },   // left = d207
+          { 61, 29, 29, 93, 97, 165, 83, 175, 162 },   // left = d63
+          { 47, 47, 43, 114, 137, 181, 100, 99, 95 }   // left = tm
+      },
+      {
+          // above = d135
+          { 69, 23, 29, 128, 83, 199, 46, 44, 101 },   // left = dc
+          { 53, 40, 55, 139, 69, 183, 61, 80, 110 },   // left = v
+          { 40, 29, 19, 161, 180, 207, 43, 24, 91 },   // left = h
+          { 60, 34, 19, 105, 61, 198, 53, 64, 89 },    // left = d45
+          { 52, 31, 22, 158, 40, 209, 58, 62, 89 },    // left = d135
+          { 44, 31, 29, 147, 46, 158, 56, 102, 198 },  // left = d117
+          { 35, 19, 12, 135, 87, 209, 41, 45, 167 },   // left = d153
+          { 55, 25, 21, 118, 95, 215, 38, 39, 66 },    // left = d207
+          { 51, 38, 25, 113, 58, 164, 70, 93, 97 },    // left = d63
+          { 47, 54, 34, 146, 108, 203, 72, 103, 151 }  // left = tm
+      },
+      {
+          // above = d117
+          { 64, 19, 37, 156, 66, 138, 49, 95, 133 },   // left = dc
+          { 46, 27, 80, 150, 55, 124, 55, 121, 135 },  // left = v
+          { 36, 23, 27, 165, 149, 166, 54, 64, 118 },  // left = h
+          { 53, 21, 36, 131, 63, 163, 60, 109, 81 },   // left = d45
+          { 40, 26, 35, 154, 40, 185, 51, 97, 123 },   // left = d135
+          { 35, 19, 34, 179, 19, 97, 48, 129, 124 },   // left = d117
+          { 36, 20, 26, 136, 62, 164, 33, 77, 154 },   // left = d153
+          { 45, 18, 32, 130, 90, 157, 40, 79, 91 },    // left = d207
+          { 45, 26, 28, 129, 45, 129, 49, 147, 123 },  // left = d63
+          { 38, 44, 51, 136, 74, 162, 57, 97, 121 }    // left = tm
+      },
+      {
+          // above = d153
+          { 75, 17, 22, 136, 138, 185, 32, 34, 166 },  // left = dc
+          { 56, 39, 58, 133, 117, 173, 48, 53, 187 },  // left = v
+          { 35, 21, 12, 161, 212, 207, 20, 23, 145 },  // left = h
+          { 56, 29, 19, 117, 109, 181, 55, 68, 112 },  // left = d45
+          { 47, 29, 17, 153, 64, 220, 59, 51, 114 },   // left = d135
+          { 46, 16, 24, 136, 76, 147, 41, 64, 172 },   // left = d117
+          { 34, 17, 11, 108, 152, 187, 13, 15, 209 },  // left = d153
+          { 51, 24, 14, 115, 133, 209, 32, 26, 104 },  // left = d207
+          { 55, 30, 18, 122, 79, 179, 44, 88, 116 },   // left = d63
+          { 37, 49, 25, 129, 168, 164, 41, 54, 148 }   // left = tm
+      },
+      {
+          // above = d207
+          { 82, 22, 32, 127, 143, 213, 39, 41, 70 },   // left = dc
+          { 62, 44, 61, 123, 105, 189, 48, 57, 64 },   // left = v
+          { 47, 25, 17, 175, 222, 220, 24, 30, 86 },   // left = h
+          { 68, 36, 17, 106, 102, 206, 59, 74, 74 },   // left = d45
+          { 57, 39, 23, 151, 68, 216, 55, 63, 58 },    // left = d135
+          { 49, 30, 35, 141, 70, 168, 82, 40, 115 },   // left = d117
+          { 51, 25, 15, 136, 129, 202, 38, 35, 139 },  // left = d153
+          { 68, 26, 16, 111, 141, 215, 29, 28, 28 },   // left = d207
+          { 59, 39, 19, 114, 75, 180, 77, 104, 42 },   // left = d63
+          { 40, 61, 26, 126, 152, 206, 61, 59, 93 }    // left = tm
+      },
+      {
+          // above = d63
+          { 78, 23, 39, 111, 117, 170, 74, 124, 94 },   // left = dc
+          { 48, 34, 86, 101, 92, 146, 78, 179, 134 },   // left = v
+          { 47, 22, 24, 138, 187, 178, 68, 69, 59 },    // left = h
+          { 56, 25, 33, 105, 112, 187, 95, 177, 129 },  // left = d45
+          { 48, 31, 27, 114, 63, 183, 82, 116, 56 },    // left = d135
+          { 43, 28, 37, 121, 63, 123, 61, 192, 169 },   // left = d117
+          { 42, 17, 24, 109, 97, 177, 56, 76, 122 },    // left = d153
+          { 58, 18, 28, 105, 139, 182, 70, 92, 63 },    // left = d207
+          { 46, 23, 32, 74, 86, 150, 67, 183, 88 },     // left = d63
+          { 36, 38, 48, 92, 122, 165, 88, 137, 91 }     // left = tm
+      },
+      {
+          // above = tm
+          { 65, 70, 60, 155, 159, 199, 61, 60, 81 },    // left = dc
+          { 44, 78, 115, 132, 119, 173, 71, 112, 93 },  // left = v
+          { 39, 38, 21, 184, 227, 206, 42, 32, 64 },    // left = h
+          { 58, 47, 36, 124, 137, 193, 80, 82, 78 },    // left = d45
+          { 49, 50, 35, 144, 95, 205, 63, 78, 59 },     // left = d135
+          { 41, 53, 52, 148, 71, 142, 65, 128, 51 },    // left = d117
+          { 40, 36, 28, 143, 143, 202, 40, 55, 137 },   // left = d153
+          { 52, 34, 29, 129, 183, 227, 42, 35, 43 },    // left = d207
+          { 42, 44, 44, 104, 105, 164, 64, 130, 80 },   // left = d63
+          { 43, 81, 53, 140, 169, 204, 68, 84, 72 }     // left = tm
+      } };
+
+static const vpx_prob default_if_y_probs[BLOCK_SIZE_GROUPS][INTRA_MODES - 1] = {
+  { 65, 32, 18, 144, 162, 194, 41, 51, 98 },   // block_size < 8x8
+  { 132, 68, 18, 165, 217, 196, 45, 40, 78 },  // block_size < 16x16
+  { 173, 80, 19, 176, 240, 193, 64, 35, 46 },  // block_size < 32x32
+  { 221, 135, 38, 194, 248, 121, 96, 85, 29 }  // block_size >= 32x32
+};
+
+static const vpx_prob default_uv_probs[INTRA_MODES][INTRA_MODES - 1] = {
+  { 120, 7, 76, 176, 208, 126, 28, 54, 103 },   // y = dc
+  { 48, 12, 154, 155, 139, 90, 34, 117, 119 },  // y = v
+  { 67, 6, 25, 204, 243, 158, 13, 21, 96 },     // y = h
+  { 97, 5, 44, 131, 176, 139, 48, 68, 97 },     // y = d45
+  { 83, 5, 42, 156, 111, 152, 26, 49, 152 },    // y = d135
+  { 80, 5, 58, 178, 74, 83, 33, 62, 145 },      // y = d117
+  { 86, 5, 32, 154, 192, 168, 14, 22, 163 },    // y = d153
+  { 85, 5, 32, 156, 216, 148, 19, 29, 73 },     // y = d207
+  { 77, 7, 64, 116, 132, 122, 37, 126, 120 },   // y = d63
+  { 101, 21, 107, 181, 192, 103, 19, 67, 125 }  // y = tm
+};
+
+#if CONFIG_EXT_PARTITION_TYPES
+static const vpx_prob
+    default_partition_probs[PARTITION_CONTEXTS][EXT_PARTITION_TYPES - 1] = {
+      // 8x8 -> 4x4
+      { 199, 122, 141, 128, 128, 128, 128 },  // a/l both not split
+      { 147, 63, 159, 128, 128, 128, 128 },   // a split, l not split
+      { 148, 133, 118, 128, 128, 128, 128 },  // l split, a not split
+      { 121, 104, 114, 128, 128, 128, 128 },  // a/l both split
+      // 16x16 -> 8x8
+      { 174, 73, 87, 128, 128, 128, 128 },  // a/l both not split
+      { 92, 41, 83, 128, 128, 128, 128 },   // a split, l not split
+      { 82, 99, 50, 128, 128, 128, 128 },   // l split, a not split
+      { 53, 39, 39, 128, 128, 128, 128 },   // a/l both split
+      // 32x32 -> 16x16
+      { 177, 58, 59, 128, 128, 128, 128 },  // a/l both not split
+      { 68, 26, 63, 128, 128, 128, 128 },   // a split, l not split
+      { 52, 79, 25, 128, 128, 128, 128 },   // l split, a not split
+      { 17, 14, 12, 128, 128, 128, 128 },   // a/l both split
+      // 64x64 -> 32x32
+      { 222, 34, 30, 128, 128, 128, 128 },  // a/l both not split
+      { 72, 16, 44, 128, 128, 128, 128 },   // a split, l not split
+      { 58, 32, 12, 128, 128, 128, 128 },   // l split, a not split
+      { 10, 7, 6, 128, 128, 128, 128 },     // a/l both split
+#if CONFIG_EXT_PARTITION
+      // 128x128 -> 64x64
+      { 222, 34, 30, 128, 128, 128, 128 },  // a/l both not split
+      { 72, 16, 44, 128, 128, 128, 128 },   // a split, l not split
+      { 58, 32, 12, 128, 128, 128, 128 },   // l split, a not split
+      { 10, 7, 6, 128, 128, 128, 128 },     // a/l both split
+#endif                                      // CONFIG_EXT_PARTITION
+    };
+#else
+static const vpx_prob
+    default_partition_probs[PARTITION_CONTEXTS][PARTITION_TYPES - 1] = {
+      // 8x8 -> 4x4
+      { 199, 122, 141 },  // a/l both not split
+      { 147, 63, 159 },   // a split, l not split
+      { 148, 133, 118 },  // l split, a not split
+      { 121, 104, 114 },  // a/l both split
+      // 16x16 -> 8x8
+      { 174, 73, 87 },  // a/l both not split
+      { 92, 41, 83 },   // a split, l not split
+      { 82, 99, 50 },   // l split, a not split
+      { 53, 39, 39 },   // a/l both split
+      // 32x32 -> 16x16
+      { 177, 58, 59 },  // a/l both not split
+      { 68, 26, 63 },   // a split, l not split
+      { 52, 79, 25 },   // l split, a not split
+      { 17, 14, 12 },   // a/l both split
+      // 64x64 -> 32x32
+      { 222, 34, 30 },  // a/l both not split
+      { 72, 16, 44 },   // a split, l not split
+      { 58, 32, 12 },   // l split, a not split
+      { 10, 7, 6 },     // a/l both split
+#if CONFIG_EXT_PARTITION
+      // 128x128 -> 64x64
+      { 222, 34, 30 },  // a/l both not split
+      { 72, 16, 44 },   // a split, l not split
+      { 58, 32, 12 },   // l split, a not split
+      { 10, 7, 6 },     // a/l both split
+#endif  // CONFIG_EXT_PARTITION
+    };
+#endif  // CONFIG_EXT_PARTITION_TYPES
+
+#if CONFIG_REF_MV
+static const vpx_prob default_newmv_prob[NEWMV_MODE_CONTEXTS] = {
+  200, 180, 150, 150, 110, 70, 60,
+};
+
+static const vpx_prob default_zeromv_prob[ZEROMV_MODE_CONTEXTS] = {
+  192, 64,
+};
+
+static const vpx_prob default_refmv_prob[REFMV_MODE_CONTEXTS] = {
+  220, 220, 200, 200, 180, 128, 30, 220, 30,
+};
+
+static const vpx_prob default_drl_prob[DRL_MODE_CONTEXTS] = { 128, 160, 180,
+                                                              128, 160 };
+
+#if CONFIG_EXT_INTER
+static const vpx_prob default_new2mv_prob = 180;
+#endif  // CONFIG_EXT_INTER
+#endif  // CONFIG_REF_MV
+
+static const vpx_prob
+    default_inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1] = {
+#if CONFIG_EXT_INTER
+      // TODO(zoeliu): To adjust the initial default probs
+      { 2, 173, 34, 173 },  // 0 = both zero mv
+      { 7, 145, 85, 145 },  // 1 = one zero mv + one a predicted mv
+      { 7, 166, 63, 166 },  // 2 = two predicted mvs
+      { 7, 94, 66, 128 },   // 3 = one predicted/zero and one new mv
+      { 8, 64, 46, 128 },   // 4 = two new mvs
+      { 17, 81, 31, 128 },  // 5 = one intra neighbour + x
+      { 25, 29, 30, 96 },   // 6 = two intra neighbours
+#else
+      { 2, 173, 34 },  // 0 = both zero mv
+      { 7, 145, 85 },  // 1 = one zero mv + one a predicted mv
+      { 7, 166, 63 },  // 2 = two predicted mvs
+      { 7, 94, 66 },   // 3 = one predicted/zero and one new mv
+      { 8, 64, 46 },   // 4 = two new mvs
+      { 17, 81, 31 },  // 5 = one intra neighbour + x
+      { 25, 29, 30 },  // 6 = two intra neighbours
+#endif  // CONFIG_EXT_INTER
+    };
+
+#if CONFIG_EXT_INTER
+static const vpx_prob default_inter_compound_mode_probs
+    [INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES - 1] = {
+      { 2, 173, 68, 192, 64, 192, 128, 180, 180 },   // 0 = both zero mv
+      { 7, 145, 160, 192, 64, 192, 128, 180, 180 },  // 1 = 1 zero + 1 predicted
+      { 7, 166, 126, 192, 64, 192, 128, 180, 180 },  // 2 = two predicted mvs
+      { 7, 94, 132, 192, 64, 192, 128, 180, 180 },   // 3 = 1 pred/zero, 1 new
+      { 8, 64, 64, 192, 64, 192, 128, 180, 180 },    // 4 = two new mvs
+      { 17, 81, 52, 192, 64, 192, 128, 180, 180 },   // 5 = one intra neighbour
+      { 25, 29, 50, 192, 64, 192, 128, 180, 180 },   // 6 = two intra neighbours
+    };
+
+static const vpx_prob default_interintra_prob[BLOCK_SIZE_GROUPS] = {
+  208, 208, 208, 208,
+};
+
+static const vpx_prob
+    default_interintra_mode_prob[BLOCK_SIZE_GROUPS][INTERINTRA_MODES - 1] = {
+      { 65, 32, 18, 144, 162, 194, 41, 51, 98 },   // block_size < 8x8
+      { 132, 68, 18, 165, 217, 196, 45, 40, 78 },  // block_size < 16x16
+      { 173, 80, 19, 176, 240, 193, 64, 35, 46 },  // block_size < 32x32
+      { 221, 135, 38, 194, 248, 121, 96, 85, 29 }  // block_size >= 32x32
+    };
+
+static const vpx_prob default_wedge_interintra_prob[BLOCK_SIZES] = {
+  208, 208, 208, 208, 208, 208, 216, 216, 216, 224, 224, 224, 240,
+#if CONFIG_EXT_PARTITION
+  208, 208, 208
+#endif  // CONFIG_EXT_PARTITION
+};
+
+static const vpx_prob default_wedge_interinter_prob[BLOCK_SIZES] = {
+  208, 208, 208, 208, 208, 208, 216, 216, 216, 224, 224, 224, 240,
+#if CONFIG_EXT_PARTITION
+  255, 255, 255
+#endif  // CONFIG_EXT_PARTITION
+};
+#endif  // CONFIG_EXT_INTER
+
+// Change this section appropriately once warped motion is supported
+#if CONFIG_OBMC && !CONFIG_WARPED_MOTION
+const vpx_tree_index vp10_motvar_tree[TREE_SIZE(MOTION_VARIATIONS)] = {
+  -SIMPLE_TRANSLATION, -OBMC_CAUSAL
+};
+static const vpx_prob default_motvar_prob[BLOCK_SIZES][MOTION_VARIATIONS - 1] =
+    {
+      { 255 }, { 255 }, { 255 }, { 151 }, { 153 }, { 144 }, { 178 },
+      { 165 }, { 160 }, { 207 }, { 195 }, { 168 }, { 244 },
+#if CONFIG_EXT_PARTITION
+      { 252 }, { 252 }, { 252 },
+#endif  // CONFIG_EXT_PARTITION
+    };
+
+#elif !CONFIG_OBMC && CONFIG_WARPED_MOTION
+
+const vpx_tree_index vp10_motvar_tree[TREE_SIZE(MOTION_VARIATIONS)] = {
+  -SIMPLE_TRANSLATION, -WARPED_CAUSAL
+};
+static const vpx_prob default_motvar_prob[BLOCK_SIZES][MOTION_VARIATIONS - 1] =
+    {
+      { 255 }, { 255 }, { 255 }, { 151 }, { 153 }, { 144 }, { 178 },
+      { 165 }, { 160 }, { 207 }, { 195 }, { 168 }, { 244 },
+#if CONFIG_EXT_PARTITION
+      { 252 }, { 252 }, { 252 },
+#endif  // CONFIG_EXT_PARTITION
+    };
+
+#elif CONFIG_OBMC && CONFIG_WARPED_MOTION
+
+const vpx_tree_index vp10_motvar_tree[TREE_SIZE(MOTION_VARIATIONS)] = {
+  -SIMPLE_TRANSLATION, 2, -OBMC_CAUSAL, -WARPED_CAUSAL,
+};
+static const vpx_prob default_motvar_prob[BLOCK_SIZES][MOTION_VARIATIONS - 1] =
+    {
+      { 255, 200 }, { 255, 200 }, { 255, 200 }, { 151, 200 }, { 153, 200 },
+      { 144, 200 }, { 178, 200 }, { 165, 200 }, { 160, 200 }, { 207, 200 },
+      { 195, 200 }, { 168, 200 }, { 244, 200 },
+#if CONFIG_EXT_PARTITION
+      { 252, 200 }, { 252, 200 }, { 252, 200 },
+#endif  // CONFIG_EXT_PARTITION
+    };
+#endif  // CONFIG_OBMC || !CONFIG_WARPED_MOTION
+
+/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */
+const vpx_tree_index vp10_intra_mode_tree[TREE_SIZE(INTRA_MODES)] = {
+  -DC_PRED,   2,          /* 0 = DC_NODE */
+  -TM_PRED,   4,          /* 1 = TM_NODE */
+  -V_PRED,    6,          /* 2 = V_NODE */
+  8,          12,         /* 3 = COM_NODE */
+  -H_PRED,    10,         /* 4 = H_NODE */
+  -D135_PRED, -D117_PRED, /* 5 = D135_NODE */
+  -D45_PRED,  14,         /* 6 = D45_NODE */
+  -D63_PRED,  16,         /* 7 = D63_NODE */
+  -D153_PRED, -D207_PRED  /* 8 = D153_NODE */
+};
+
+const vpx_tree_index vp10_inter_mode_tree[TREE_SIZE(INTER_MODES)] = {
+  -INTER_OFFSET(ZEROMV),    2,
+  -INTER_OFFSET(NEARESTMV), 4,
+#if CONFIG_EXT_INTER
+  -INTER_OFFSET(NEARMV),    6,
+  -INTER_OFFSET(NEWMV),     -INTER_OFFSET(NEWFROMNEARMV)
+#else
+  -INTER_OFFSET(NEARMV),    -INTER_OFFSET(NEWMV)
+#endif  // CONFIG_EXT_INTER
+};
+
+#if CONFIG_EXT_INTER
+/* clang-format off */
+const vpx_tree_index vp10_interintra_mode_tree[TREE_SIZE(INTERINTRA_MODES)] = {
+  -II_DC_PRED, 2,                   /* 0 = II_DC_NODE     */
+  -II_TM_PRED, 4,                   /* 1 = II_TM_NODE     */
+  -II_V_PRED, 6,                    /* 2 = II_V_NODE      */
+  8, 12,                            /* 3 = II_COM_NODE    */
+  -II_H_PRED, 10,                   /* 4 = II_H_NODE      */
+  -II_D135_PRED, -II_D117_PRED,     /* 5 = II_D135_NODE   */
+  -II_D45_PRED, 14,                 /* 6 = II_D45_NODE    */
+  -II_D63_PRED, 16,                 /* 7 = II_D63_NODE    */
+  -II_D153_PRED, -II_D207_PRED      /* 8 = II_D153_NODE   */
+};
+
+const vpx_tree_index vp10_inter_compound_mode_tree
+    [TREE_SIZE(INTER_COMPOUND_MODES)] = {
+  -INTER_COMPOUND_OFFSET(ZERO_ZEROMV), 2,
+  -INTER_COMPOUND_OFFSET(NEAREST_NEARESTMV), 4,
+  6, -INTER_COMPOUND_OFFSET(NEW_NEWMV),
+  8, 12,
+  -INTER_COMPOUND_OFFSET(NEAR_NEARMV), 10,
+  -INTER_COMPOUND_OFFSET(NEAREST_NEARMV),
+      -INTER_COMPOUND_OFFSET(NEAR_NEARESTMV),
+  14, 16,
+  -INTER_COMPOUND_OFFSET(NEAREST_NEWMV), -INTER_COMPOUND_OFFSET(NEW_NEARESTMV),
+  -INTER_COMPOUND_OFFSET(NEAR_NEWMV), -INTER_COMPOUND_OFFSET(NEW_NEARMV)
+};
+/* clang-format on */
+#endif  // CONFIG_EXT_INTER
+
+const vpx_tree_index vp10_partition_tree[TREE_SIZE(PARTITION_TYPES)] = {
+  -PARTITION_NONE, 2, -PARTITION_HORZ, 4, -PARTITION_VERT, -PARTITION_SPLIT
+};
+
+#if CONFIG_EXT_PARTITION_TYPES
+/* clang-format off */
+const vpx_tree_index vp10_ext_partition_tree[TREE_SIZE(EXT_PARTITION_TYPES)] = {
+  -PARTITION_NONE, 2,
+  6, 4,
+  8, -PARTITION_SPLIT,
+  -PARTITION_HORZ, 10,
+  -PARTITION_VERT, 12,
+  -PARTITION_HORZ_A, -PARTITION_HORZ_B,
+  -PARTITION_VERT_A, -PARTITION_VERT_B
+};
+/* clang-format on */
+#endif  // CONFIG_EXT_PARTITION_TYPES
+
+static const vpx_prob default_intra_inter_p[INTRA_INTER_CONTEXTS] = {
+  9, 102, 187, 225
+};
+
+static const vpx_prob default_comp_inter_p[COMP_INTER_CONTEXTS] = {
+  239, 183, 119, 96, 41
+};
+
+#if CONFIG_EXT_REFS
+static const vpx_prob default_comp_ref_p[REF_CONTEXTS][FWD_REFS - 1] = {
+  // TODO(zoeliu): To adjust the initial prob values.
+  { 33, 16, 16 },
+  { 77, 74, 74 },
+  { 142, 142, 142 },
+  { 172, 170, 170 },
+  { 238, 247, 247 }
+};
+static const vpx_prob default_comp_bwdref_p[REF_CONTEXTS][BWD_REFS - 1] = {
+  { 16 }, { 74 }, { 142 }, { 170 }, { 247 }
+};
+#else
+static const vpx_prob default_comp_ref_p[REF_CONTEXTS][COMP_REFS - 1] = {
+  { 50 }, { 126 }, { 123 }, { 221 }, { 226 }
+};
+#endif  // CONFIG_EXT_REFS
+
+static const vpx_prob default_single_ref_p[REF_CONTEXTS][SINGLE_REFS - 1] = {
+#if CONFIG_EXT_REFS
+  { 33, 16, 16, 16, 16 },
+  { 77, 74, 74, 74, 74 },
+  { 142, 142, 142, 142, 142 },
+  { 172, 170, 170, 170, 170 },
+  { 238, 247, 247, 247, 247 }
+#else
+  { 33, 16 }, { 77, 74 }, { 142, 142 }, { 172, 170 }, { 238, 247 }
+#endif  // CONFIG_EXT_REFS
+};
+
+const vpx_tree_index vp10_palette_size_tree[TREE_SIZE(PALETTE_SIZES)] = {
+  -TWO_COLORS,  2, -THREE_COLORS, 4,  -FOUR_COLORS,  6,
+  -FIVE_COLORS, 8, -SIX_COLORS,   10, -SEVEN_COLORS, -EIGHT_COLORS,
+};
+
+// TODO(huisu): tune these probs
+const vpx_prob
+    vp10_default_palette_y_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1] = {
+      { 96, 89, 100, 64, 77, 130 },   { 22, 15, 44, 16, 34, 82 },
+      { 30, 19, 57, 18, 38, 86 },     { 94, 36, 104, 23, 43, 92 },
+      { 116, 76, 107, 46, 65, 105 },  { 112, 82, 94, 40, 70, 112 },
+      { 147, 124, 123, 58, 69, 103 }, { 180, 113, 136, 49, 45, 114 },
+      { 107, 70, 87, 49, 154, 156 },  { 98, 105, 142, 63, 64, 152 },
+#if CONFIG_EXT_PARTITION
+      { 98, 105, 142, 63, 64, 152 },  { 98, 105, 142, 63, 64, 152 },
+      { 98, 105, 142, 63, 64, 152 },
+#endif  // CONFIG_EXT_PARTITION
+    };
+
+const vpx_prob
+    vp10_default_palette_uv_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1] =
+        {
+          { 160, 196, 228, 213, 175, 230 }, { 87, 148, 208, 141, 166, 163 },
+          { 72, 151, 204, 139, 155, 161 },  { 78, 135, 171, 104, 120, 173 },
+          { 59, 92, 131, 78, 92, 142 },     { 75, 118, 149, 84, 90, 128 },
+          { 89, 87, 92, 66, 66, 128 },      { 67, 53, 54, 55, 66, 93 },
+          { 120, 130, 83, 171, 75, 214 },   { 72, 55, 66, 68, 79, 107 },
+#if CONFIG_EXT_PARTITION
+          { 72, 55, 66, 68, 79, 107 },      { 72, 55, 66, 68, 79, 107 },
+          { 72, 55, 66, 68, 79, 107 },
+#endif  // CONFIG_EXT_PARTITION
+        };
+
+const vpx_prob vp10_default_palette_y_mode_prob
+    [PALETTE_BLOCK_SIZES][PALETTE_Y_MODE_CONTEXTS] = {
+      { 240, 180, 100 }, { 240, 180, 100 }, { 240, 180, 100 },
+      { 240, 180, 100 }, { 240, 180, 100 }, { 240, 180, 100 },
+      { 240, 180, 100 }, { 240, 180, 100 }, { 240, 180, 100 },
+      { 240, 180, 100 },
+#if CONFIG_EXT_PARTITION
+      { 240, 180, 100 }, { 240, 180, 100 }, { 240, 180, 100 },
+#endif  // CONFIG_EXT_PARTITION
+    };
+
+const vpx_prob vp10_default_palette_uv_mode_prob[2] = { 253, 229 };
+
+const vpx_tree_index
+    vp10_palette_color_tree[PALETTE_MAX_SIZE - 1][TREE_SIZE(PALETTE_COLORS)] = {
+      { // 2 colors
+        -PALETTE_COLOR_ONE, -PALETTE_COLOR_TWO },
+      { // 3 colors
+        -PALETTE_COLOR_ONE, 2, -PALETTE_COLOR_TWO, -PALETTE_COLOR_THREE },
+      { // 4 colors
+        -PALETTE_COLOR_ONE, 2, -PALETTE_COLOR_TWO, 4, -PALETTE_COLOR_THREE,
+        -PALETTE_COLOR_FOUR },
+      { // 5 colors
+        -PALETTE_COLOR_ONE, 2, -PALETTE_COLOR_TWO, 4, -PALETTE_COLOR_THREE, 6,
+        -PALETTE_COLOR_FOUR, -PALETTE_COLOR_FIVE },
+      { // 6 colors
+        -PALETTE_COLOR_ONE, 2, -PALETTE_COLOR_TWO, 4, -PALETTE_COLOR_THREE, 6,
+        -PALETTE_COLOR_FOUR, 8, -PALETTE_COLOR_FIVE, -PALETTE_COLOR_SIX },
+      { // 7 colors
+        -PALETTE_COLOR_ONE, 2, -PALETTE_COLOR_TWO, 4, -PALETTE_COLOR_THREE, 6,
+        -PALETTE_COLOR_FOUR, 8, -PALETTE_COLOR_FIVE, 10, -PALETTE_COLOR_SIX,
+        -PALETTE_COLOR_SEVEN },
+      { // 8 colors
+        -PALETTE_COLOR_ONE, 2, -PALETTE_COLOR_TWO, 4, -PALETTE_COLOR_THREE, 6,
+        -PALETTE_COLOR_FOUR, 8, -PALETTE_COLOR_FIVE, 10, -PALETTE_COLOR_SIX, 12,
+        -PALETTE_COLOR_SEVEN, -PALETTE_COLOR_EIGHT },
+    };
+
+const vpx_prob vp10_default_palette_y_color_prob
+    [PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1] = {
+      {
+          // 2 colors
+          { 230, 255, 128, 128, 128, 128, 128 },
+          { 214, 255, 128, 128, 128, 128, 128 },
+          { 128, 128, 128, 128, 128, 128, 128 },
+          { 128, 128, 128, 128, 128, 128, 128 },
+          { 128, 128, 128, 128, 128, 128, 128 },
+          { 240, 255, 128, 128, 128, 128, 128 },
+          { 73, 255, 128, 128, 128, 128, 128 },
+          { 128, 128, 128, 128, 128, 128, 128 },
+          { 130, 255, 128, 128, 128, 128, 128 },
+          { 227, 255, 128, 128, 128, 128, 128 },
+          { 128, 128, 128, 128, 128, 128, 128 },
+          { 188, 255, 128, 128, 128, 128, 128 },
+          { 75, 255, 128, 128, 128, 128, 128 },
+          { 250, 255, 128, 128, 128, 128, 128 },
+          { 223, 255, 128, 128, 128, 128, 128 },
+          { 252, 255, 128, 128, 128, 128, 128 },
+      },
+      {
+          // 3 colors
+          { 229, 137, 255, 128, 128, 128, 128 },
+          { 197, 120, 255, 128, 128, 128, 128 },
+          { 107, 195, 255, 128, 128, 128, 128 },
+          { 128, 128, 128, 128, 128, 128, 128 },
+          { 27, 151, 255, 128, 128, 128, 128 },
+          { 230, 130, 255, 128, 128, 128, 128 },
+          { 37, 230, 255, 128, 128, 128, 128 },
+          { 67, 221, 255, 128, 128, 128, 128 },
+          { 124, 230, 255, 128, 128, 128, 128 },
+          { 195, 109, 255, 128, 128, 128, 128 },
+          { 99, 122, 255, 128, 128, 128, 128 },
+          { 205, 208, 255, 128, 128, 128, 128 },
+          { 40, 235, 255, 128, 128, 128, 128 },
+          { 251, 132, 255, 128, 128, 128, 128 },
+          { 237, 186, 255, 128, 128, 128, 128 },
+          { 253, 112, 255, 128, 128, 128, 128 },
+      },
+      {
+          // 4 colors
+          { 195, 87, 128, 255, 128, 128, 128 },
+          { 143, 100, 123, 255, 128, 128, 128 },
+          { 94, 124, 119, 255, 128, 128, 128 },
+          { 77, 91, 130, 255, 128, 128, 128 },
+          { 39, 114, 178, 255, 128, 128, 128 },
+          { 222, 94, 125, 255, 128, 128, 128 },
+          { 44, 203, 132, 255, 128, 128, 128 },
+          { 68, 175, 122, 255, 128, 128, 128 },
+          { 110, 187, 124, 255, 128, 128, 128 },
+          { 152, 91, 128, 255, 128, 128, 128 },
+          { 70, 109, 181, 255, 128, 128, 128 },
+          { 133, 113, 164, 255, 128, 128, 128 },
+          { 47, 205, 133, 255, 128, 128, 128 },
+          { 247, 94, 136, 255, 128, 128, 128 },
+          { 205, 122, 146, 255, 128, 128, 128 },
+          { 251, 100, 141, 255, 128, 128, 128 },
+      },
+      {
+          // 5 colors
+          { 195, 65, 84, 125, 255, 128, 128 },
+          { 150, 76, 84, 121, 255, 128, 128 },
+          { 94, 110, 81, 117, 255, 128, 128 },
+          { 79, 85, 91, 139, 255, 128, 128 },
+          { 26, 102, 139, 127, 255, 128, 128 },
+          { 220, 73, 91, 119, 255, 128, 128 },
+          { 38, 203, 86, 127, 255, 128, 128 },
+          { 61, 186, 72, 124, 255, 128, 128 },
+          { 132, 199, 84, 128, 255, 128, 128 },
+          { 172, 52, 62, 120, 255, 128, 128 },
+          { 102, 89, 121, 122, 255, 128, 128 },
+          { 182, 48, 69, 186, 255, 128, 128 },
+          { 36, 206, 87, 126, 255, 128, 128 },
+          { 249, 55, 67, 122, 255, 128, 128 },
+          { 218, 88, 75, 122, 255, 128, 128 },
+          { 253, 64, 80, 119, 255, 128, 128 },
+      },
+      {
+          // 6 colors
+          { 182, 54, 64, 75, 118, 255, 128 },
+          { 126, 67, 70, 76, 116, 255, 128 },
+          { 79, 92, 67, 85, 120, 255, 128 },
+          { 63, 61, 81, 118, 132, 255, 128 },
+          { 21, 80, 105, 83, 119, 255, 128 },
+          { 215, 72, 74, 74, 111, 255, 128 },
+          { 50, 176, 63, 79, 120, 255, 128 },
+          { 72, 148, 66, 77, 120, 255, 128 },
+          { 105, 177, 57, 78, 130, 255, 128 },
+          { 150, 66, 66, 80, 127, 255, 128 },
+          { 81, 76, 109, 85, 116, 255, 128 },
+          { 113, 81, 62, 96, 148, 255, 128 },
+          { 54, 179, 69, 82, 121, 255, 128 },
+          { 244, 47, 48, 67, 118, 255, 128 },
+          { 198, 83, 53, 65, 121, 255, 128 },
+          { 250, 42, 51, 69, 110, 255, 128 },
+      },
+      {
+          // 7 colors
+          { 182, 45, 54, 62, 74, 113, 255 },
+          { 124, 63, 57, 62, 77, 114, 255 },
+          { 77, 80, 56, 66, 76, 117, 255 },
+          { 63, 57, 69, 98, 85, 131, 255 },
+          { 19, 81, 98, 63, 80, 116, 255 },
+          { 215, 56, 60, 63, 68, 105, 255 },
+          { 50, 174, 50, 60, 79, 118, 255 },
+          { 68, 151, 50, 58, 73, 117, 255 },
+          { 104, 182, 53, 57, 79, 127, 255 },
+          { 156, 50, 51, 63, 77, 111, 255 },
+          { 88, 67, 97, 59, 82, 120, 255 },
+          { 114, 81, 46, 65, 103, 132, 255 },
+          { 55, 166, 57, 66, 82, 120, 255 },
+          { 245, 34, 38, 43, 63, 114, 255 },
+          { 203, 68, 45, 47, 60, 118, 255 },
+          { 250, 35, 37, 47, 66, 110, 255 },
+      },
+      {
+          // 8 colors
+          { 180, 43, 46, 50, 56, 69, 109 },
+          { 116, 53, 51, 49, 57, 73, 115 },
+          { 79, 70, 49, 50, 59, 74, 117 },
+          { 60, 54, 57, 70, 62, 83, 129 },
+          { 20, 73, 85, 52, 66, 81, 119 },
+          { 213, 56, 52, 49, 53, 62, 104 },
+          { 48, 161, 41, 45, 56, 77, 116 },
+          { 68, 139, 40, 47, 54, 71, 116 },
+          { 123, 166, 42, 43, 52, 76, 130 },
+          { 153, 44, 44, 47, 54, 79, 129 },
+          { 87, 64, 83, 49, 60, 75, 127 },
+          { 131, 68, 43, 48, 73, 96, 130 },
+          { 55, 152, 45, 51, 64, 77, 113 },
+          { 243, 30, 28, 33, 41, 65, 114 },
+          { 202, 56, 35, 36, 42, 63, 123 },
+          { 249, 31, 29, 32, 45, 68, 111 },
+      }
+    };
+
+const vpx_prob vp10_default_palette_uv_color_prob
+    [PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1] = {
+      {
+          // 2 colors
+          { 228, 255, 128, 128, 128, 128, 128 },
+          { 195, 255, 128, 128, 128, 128, 128 },
+          { 128, 128, 128, 128, 128, 128, 128 },
+          { 128, 128, 128, 128, 128, 128, 128 },
+          { 128, 128, 128, 128, 128, 128, 128 },
+          { 228, 255, 128, 128, 128, 128, 128 },
+          { 71, 255, 128, 128, 128, 128, 128 },
+          { 128, 128, 128, 128, 128, 128, 128 },
+          { 129, 255, 128, 128, 128, 128, 128 },
+          { 206, 255, 128, 128, 128, 128, 128 },
+          { 128, 128, 128, 128, 128, 128, 128 },
+          { 136, 255, 128, 128, 128, 128, 128 },
+          { 98, 255, 128, 128, 128, 128, 128 },
+          { 236, 255, 128, 128, 128, 128, 128 },
+          { 222, 255, 128, 128, 128, 128, 128 },
+          { 249, 255, 128, 128, 128, 128, 128 },
+      },
+      {
+          // 3 colors
+          { 198, 136, 255, 128, 128, 128, 128 },
+          { 178, 105, 255, 128, 128, 128, 128 },
+          { 100, 206, 255, 128, 128, 128, 128 },
+          { 128, 128, 128, 128, 128, 128, 128 },
+          { 12, 136, 255, 128, 128, 128, 128 },
+          { 219, 134, 255, 128, 128, 128, 128 },
+          { 50, 198, 255, 128, 128, 128, 128 },
+          { 61, 231, 255, 128, 128, 128, 128 },
+          { 110, 209, 255, 128, 128, 128, 128 },
+          { 173, 106, 255, 128, 128, 128, 128 },
+          { 145, 166, 255, 128, 128, 128, 128 },
+          { 156, 175, 255, 128, 128, 128, 128 },
+          { 69, 183, 255, 128, 128, 128, 128 },
+          { 241, 163, 255, 128, 128, 128, 128 },
+          { 224, 160, 255, 128, 128, 128, 128 },
+          { 246, 154, 255, 128, 128, 128, 128 },
+      },
+      {
+          // 4 colors
+          { 173, 88, 143, 255, 128, 128, 128 },
+          { 146, 81, 127, 255, 128, 128, 128 },
+          { 84, 134, 102, 255, 128, 128, 128 },
+          { 69, 138, 140, 255, 128, 128, 128 },
+          { 31, 103, 200, 255, 128, 128, 128 },
+          { 217, 101, 139, 255, 128, 128, 128 },
+          { 51, 174, 121, 255, 128, 128, 128 },
+          { 64, 177, 109, 255, 128, 128, 128 },
+          { 96, 179, 145, 255, 128, 128, 128 },
+          { 164, 77, 114, 255, 128, 128, 128 },
+          { 87, 94, 156, 255, 128, 128, 128 },
+          { 105, 57, 173, 255, 128, 128, 128 },
+          { 63, 158, 137, 255, 128, 128, 128 },
+          { 236, 102, 156, 255, 128, 128, 128 },
+          { 197, 115, 153, 255, 128, 128, 128 },
+          { 245, 106, 154, 255, 128, 128, 128 },
+      },
+      {
+          // 5 colors
+          { 179, 64, 97, 129, 255, 128, 128 },
+          { 137, 56, 88, 125, 255, 128, 128 },
+          { 82, 107, 61, 118, 255, 128, 128 },
+          { 59, 113, 86, 115, 255, 128, 128 },
+          { 23, 88, 118, 130, 255, 128, 128 },
+          { 213, 66, 90, 125, 255, 128, 128 },
+          { 37, 181, 103, 121, 255, 128, 128 },
+          { 47, 188, 61, 131, 255, 128, 128 },
+          { 104, 185, 103, 144, 255, 128, 128 },
+          { 163, 39, 76, 112, 255, 128, 128 },
+          { 94, 74, 131, 126, 255, 128, 128 },
+          { 142, 42, 103, 163, 255, 128, 128 },
+          { 53, 162, 99, 149, 255, 128, 128 },
+          { 239, 54, 84, 108, 255, 128, 128 },
+          { 203, 84, 110, 147, 255, 128, 128 },
+          { 248, 70, 105, 151, 255, 128, 128 },
+      },
+      {
+          // 6 colors
+          { 189, 50, 67, 90, 130, 255, 128 },
+          { 114, 50, 55, 90, 123, 255, 128 },
+          { 66, 76, 54, 82, 128, 255, 128 },
+          { 43, 69, 69, 80, 129, 255, 128 },
+          { 22, 59, 87, 88, 141, 255, 128 },
+          { 203, 49, 68, 87, 122, 255, 128 },
+          { 43, 157, 74, 104, 146, 255, 128 },
+          { 54, 138, 51, 95, 138, 255, 128 },
+          { 82, 171, 58, 102, 146, 255, 128 },
+          { 129, 38, 59, 64, 168, 255, 128 },
+          { 56, 67, 119, 92, 112, 255, 128 },
+          { 96, 62, 53, 132, 82, 255, 128 },
+          { 60, 147, 77, 108, 145, 255, 128 },
+          { 238, 76, 73, 93, 148, 255, 128 },
+          { 189, 86, 73, 103, 157, 255, 128 },
+          { 246, 62, 75, 83, 167, 255, 128 },
+      },
+      {
+          // 7 colors
+          { 179, 42, 51, 73, 99, 134, 255 },
+          { 119, 52, 52, 61, 64, 114, 255 },
+          { 53, 77, 35, 65, 71, 131, 255 },
+          { 38, 70, 51, 68, 89, 144, 255 },
+          { 23, 65, 128, 73, 97, 131, 255 },
+          { 210, 47, 52, 63, 81, 143, 255 },
+          { 42, 159, 57, 68, 98, 143, 255 },
+          { 49, 153, 45, 82, 93, 143, 255 },
+          { 81, 169, 52, 72, 113, 151, 255 },
+          { 136, 46, 35, 56, 75, 96, 255 },
+          { 57, 84, 109, 47, 107, 131, 255 },
+          { 128, 78, 57, 36, 128, 85, 255 },
+          { 54, 149, 68, 77, 94, 153, 255 },
+          { 243, 58, 50, 71, 81, 167, 255 },
+          { 189, 92, 64, 70, 121, 173, 255 },
+          { 248, 35, 38, 51, 82, 201, 255 },
+      },
+      {
+          // 8 colors
+          { 201, 40, 36, 42, 64, 92, 123 },
+          { 116, 43, 33, 43, 73, 102, 128 },
+          { 46, 77, 37, 69, 62, 78, 150 },
+          { 40, 65, 52, 50, 76, 89, 133 },
+          { 28, 48, 91, 17, 64, 77, 133 },
+          { 218, 43, 43, 37, 56, 72, 163 },
+          { 41, 155, 44, 83, 82, 129, 180 },
+          { 44, 141, 29, 55, 64, 89, 147 },
+          { 92, 166, 48, 45, 59, 126, 179 },
+          { 169, 35, 49, 41, 36, 99, 139 },
+          { 55, 77, 77, 56, 60, 75, 156 },
+          { 155, 81, 51, 64, 57, 182, 255 },
+          { 60, 134, 49, 49, 93, 128, 174 },
+          { 244, 98, 51, 46, 22, 73, 238 },
+          { 189, 70, 40, 87, 93, 79, 201 },
+          { 248, 54, 49, 40, 29, 42, 227 },
+      }
+    };
+
+static const int palette_color_context_lookup[PALETTE_COLOR_CONTEXTS] = {
+  // (3, 0, 0, 0), (3, 2, 0, 0), (3, 3, 2, 0), (3, 3, 2, 2),
+  3993, 4235, 4378, 4380,
+  // (4, 3, 3, 0), (5, 0, 0, 0), (5, 3, 0, 0), (5, 3, 2, 0),
+  5720, 6655, 7018, 7040,
+  // (5, 5, 0, 0), (6, 2, 0, 0), (6, 2, 2, 0), (6, 4, 0, 0),
+  7260, 8228, 8250, 8470,
+  // (7, 3, 0, 0), (8, 0, 0, 0), (8, 2, 0, 0), (10, 0, 0, 0)
+  9680, 10648, 10890, 13310
+};
+
+const vpx_tree_index vp10_tx_size_tree[TX_SIZES - 1][TREE_SIZE(TX_SIZES)] = {
+  {
+      // Max tx_size is 8X8
+      -TX_4X4, -TX_8X8,
+  },
+  {
+      // Max tx_size is 16X16
+      -TX_4X4, 2, -TX_8X8, -TX_16X16,
+  },
+  {
+      // Max tx_size is 32X32
+      -TX_4X4, 2, -TX_8X8, 4, -TX_16X16, -TX_32X32,
+  },
+};
+
+static const vpx_prob
+    default_tx_size_prob[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES - 1] = {
+      {
+          // Max tx_size is 8X8
+          { 100 },
+          { 66 },
+      },
+      {
+          // Max tx_size is 16X16
+          { 20, 152 },
+          { 15, 101 },
+      },
+      {
+          // Max tx_size is 32X32
+          { 3, 136, 37 },
+          { 5, 52, 13 },
+      },
+    };
+
+int vp10_get_palette_color_context(const uint8_t *color_map, int cols, int r,
+                                   int c, int n, int *color_order) {
+  int i, j, max, max_idx, temp;
+  int scores[PALETTE_MAX_SIZE + 10];
+  int weights[4] = { 3, 2, 3, 2 };
+  int color_ctx = 0;
+  int color_neighbors[4];
+
+  assert(n <= PALETTE_MAX_SIZE);
+
+  if (c - 1 >= 0)
+    color_neighbors[0] = color_map[r * cols + c - 1];
+  else
+    color_neighbors[0] = -1;
+  if (c - 1 >= 0 && r - 1 >= 0)
+    color_neighbors[1] = color_map[(r - 1) * cols + c - 1];
+  else
+    color_neighbors[1] = -1;
+  if (r - 1 >= 0)
+    color_neighbors[2] = color_map[(r - 1) * cols + c];
+  else
+    color_neighbors[2] = -1;
+  if (r - 1 >= 0 && c + 1 <= cols - 1)
+    color_neighbors[3] = color_map[(r - 1) * cols + c + 1];
+  else
+    color_neighbors[3] = -1;
+
+  for (i = 0; i < PALETTE_MAX_SIZE; ++i) color_order[i] = i;
+  memset(scores, 0, PALETTE_MAX_SIZE * sizeof(scores[0]));
+  for (i = 0; i < 4; ++i) {
+    if (color_neighbors[i] >= 0) scores[color_neighbors[i]] += weights[i];
+  }
+
+  for (i = 0; i < 4; ++i) {
+    max = scores[i];
+    max_idx = i;
+    j = i + 1;
+    while (j < n) {
+      if (scores[j] > max) {
+        max = scores[j];
+        max_idx = j;
+      }
+      ++j;
+    }
+
+    if (max_idx != i) {
+      temp = scores[i];
+      scores[i] = scores[max_idx];
+      scores[max_idx] = temp;
+
+      temp = color_order[i];
+      color_order[i] = color_order[max_idx];
+      color_order[max_idx] = temp;
+    }
+  }
+
+  for (i = 0; i < 4; ++i) color_ctx = color_ctx * 11 + scores[i];
+
+  for (i = 0; i < PALETTE_COLOR_CONTEXTS; ++i)
+    if (color_ctx == palette_color_context_lookup[i]) {
+      color_ctx = i;
+      break;
+    }
+
+  if (color_ctx >= PALETTE_COLOR_CONTEXTS) color_ctx = 0;
+
+  return color_ctx;
+}
+
+#if CONFIG_VAR_TX
+static const vpx_prob default_txfm_partition_probs[TXFM_PARTITION_CONTEXTS] = {
+  192, 128, 64, 192, 128, 64, 192, 128, 64,
+};
+#endif
+
+static const vpx_prob default_skip_probs[SKIP_CONTEXTS] = { 192, 128, 64 };
+
+#if CONFIG_EXT_INTERP
+static const vpx_prob default_switchable_interp_prob
+    [SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS - 1] = {
+#if CONFIG_DUAL_FILTER
+      { 235, 192, 128, 128 }, { 36, 243, 208, 128 }, { 34, 16, 128, 128 },
+      { 36, 243, 48, 128 },   { 34, 16, 128, 128 },  { 149, 160, 128, 128 },
+
+      { 235, 192, 128, 128 }, { 36, 243, 208, 128 }, { 34, 16, 128, 128 },
+      { 36, 243, 48, 128 },   { 34, 16, 128, 128 },  { 149, 160, 128, 128 },
+
+      { 235, 192, 128, 128 }, { 36, 243, 208, 128 }, { 34, 16, 128, 128 },
+      { 36, 243, 48, 128 },   { 34, 16, 128, 128 },  { 149, 160, 128, 128 },
+
+      { 235, 192, 128, 128 }, { 36, 243, 208, 128 }, { 34, 16, 128, 128 },
+      { 36, 243, 48, 128 },   { 34, 16, 128, 128 },  { 149, 160, 128, 128 },
+#else
+      { 235, 192, 128, 128 }, { 36, 243, 208, 128 }, { 34, 16, 128, 128 },
+      { 36, 243, 48, 128 },   { 34, 16, 128, 128 },  { 149, 160, 128, 128 },
+#endif
+    };
+#else  // CONFIG_EXT_INTERP
+#if CONFIG_DUAL_FILTER
+static const vpx_prob default_switchable_interp_prob
+    [SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS - 1] = {
+      { 235, 162 }, { 36, 255 }, { 34, 3 }, { 149, 144 },
+
+      { 235, 162 }, { 36, 255 }, { 34, 3 }, { 10, 3 },
+
+      { 235, 162 }, { 36, 255 }, { 34, 3 }, { 149, 144 },
+
+      { 235, 162 }, { 36, 255 }, { 34, 3 }, { 10, 3 },
+    };
+#else
+static const vpx_prob default_switchable_interp_prob
+    [SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS - 1] = {
+      { 235, 162 }, { 36, 255 }, { 34, 3 }, { 149, 144 },
+    };
+#endif
+#endif  // CONFIG_EXT_INTERP
+
+#if CONFIG_EXT_TX
+/* clang-format off */
+const vpx_tree_index vp10_ext_tx_inter_tree[EXT_TX_SETS_INTER]
+                                           [TREE_SIZE(TX_TYPES)] = {
+  { // ToDo(yaowu): remove used entry 0.
+    0
+  }, {
+    -IDTX, 2,
+    4, 14,
+    6, 8,
+    -V_DCT, -H_DCT,
+    10, 12,
+    -V_ADST, -H_ADST,
+    -V_FLIPADST, -H_FLIPADST,
+    -DCT_DCT, 16,
+    18, 24,
+    20, 22,
+    -ADST_DCT, -DCT_ADST,
+    -FLIPADST_DCT, -DCT_FLIPADST,
+    26, 28,
+    -ADST_ADST, -FLIPADST_FLIPADST,
+    -ADST_FLIPADST, -FLIPADST_ADST
+  }, {
+    -IDTX, 2,
+    4, 6,
+    -V_DCT, -H_DCT,
+    -DCT_DCT, 8,
+    10, 16,
+    12, 14,
+    -ADST_DCT, -DCT_ADST,
+    -FLIPADST_DCT, -DCT_FLIPADST,
+    18, 20,
+    -ADST_ADST, -FLIPADST_FLIPADST,
+    -ADST_FLIPADST, -FLIPADST_ADST
+  }, {
+    -IDTX, -DCT_DCT,
+  }
+};
+
+const vpx_tree_index vp10_ext_tx_intra_tree[EXT_TX_SETS_INTRA]
+                                           [TREE_SIZE(TX_TYPES)] = {
+  {  // ToDo(yaowu): remove unused entry 0.
+    0
+  }, {
+    -IDTX, 2,
+    -DCT_DCT, 4,
+    6, 8,
+    -V_DCT, -H_DCT,
+    -ADST_ADST, 10,
+    -ADST_DCT, -DCT_ADST,
+  }, {
+    -IDTX, 2,
+    -DCT_DCT, 4,
+    -ADST_ADST, 6,
+    -ADST_DCT, -DCT_ADST,
+  }
+};
+/* clang-format on */
+
+static const vpx_prob
+    default_inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1] = {
+      {
+          // ToDo(yaowu): remove unused entry 0.
+          { 0 },
+          { 0 },
+          { 0 },
+#if EXT_TX_SIZES == 4
+          { 0 },
+#endif
+      },
+      {
+          { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128,
+            128 },
+          { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128,
+            128 },
+          { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128,
+            128 },
+#if EXT_TX_SIZES == 4
+          { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128,
+            128 },
+#endif
+      },
+      {
+          { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 },
+          { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 },
+          { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 },
+#if EXT_TX_SIZES == 4
+          { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 },
+#endif
+      },
+      {
+          { 12 },
+          { 12 },
+          { 12 },
+#if EXT_TX_SIZES == 4
+          { 12 },
+#endif
+      }
+    };
+
+static const vpx_prob default_intra_ext_tx_prob
+    [EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES][TX_TYPES - 1] = {
+      {
+          // ToDo(yaowu): remove unused entry 0.
+          {
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+          },
+          {
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+          },
+          {
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+#if EXT_TX_SIZES == 4
+          },
+          {
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+#endif
+          },
+      },
+      {
+          {
+              { 8, 224, 32, 128, 64, 128 },
+              { 10, 32, 32, 128, 16, 192 },
+              { 10, 32, 32, 128, 16, 64 },
+              { 9, 200, 32, 128, 64, 128 },
+              { 8, 8, 32, 128, 224, 128 },
+              { 10, 32, 32, 128, 16, 192 },
+              { 10, 32, 32, 128, 16, 64 },
+              { 10, 23, 32, 128, 80, 176 },
+              { 10, 23, 32, 128, 80, 176 },
+              { 10, 32, 32, 128, 16, 64 },
+          },
+          {
+              { 8, 224, 32, 128, 64, 128 },
+              { 10, 32, 32, 128, 16, 192 },
+              { 10, 32, 32, 128, 16, 64 },
+              { 9, 200, 32, 128, 64, 128 },
+              { 8, 8, 32, 128, 224, 128 },
+              { 10, 32, 32, 128, 16, 192 },
+              { 10, 32, 32, 128, 16, 64 },
+              { 10, 23, 32, 128, 80, 176 },
+              { 10, 23, 32, 128, 80, 176 },
+              { 10, 32, 32, 128, 16, 64 },
+          },
+          {
+              { 8, 224, 32, 128, 64, 128 },
+              { 10, 32, 32, 128, 16, 192 },
+              { 10, 32, 32, 128, 16, 64 },
+              { 9, 200, 32, 128, 64, 128 },
+              { 8, 8, 32, 128, 224, 128 },
+              { 10, 32, 32, 128, 16, 192 },
+              { 10, 32, 32, 128, 16, 64 },
+              { 10, 23, 32, 128, 80, 176 },
+              { 10, 23, 32, 128, 80, 176 },
+              { 10, 32, 32, 128, 16, 64 },
+#if EXT_TX_SIZES == 4
+          },
+          {
+              { 8, 224, 32, 128, 64, 128 },
+              { 10, 32, 32, 128, 16, 192 },
+              { 10, 32, 32, 128, 16, 64 },
+              { 9, 200, 32, 128, 64, 128 },
+              { 8, 8, 32, 128, 224, 128 },
+              { 10, 32, 32, 128, 16, 192 },
+              { 10, 32, 32, 128, 16, 64 },
+              { 10, 23, 32, 128, 80, 176 },
+              { 10, 23, 32, 128, 80, 176 },
+              { 10, 32, 32, 128, 16, 64 },
+#endif
+          },
+      },
+      {
+          {
+              { 8, 224, 64, 128 },
+              { 10, 32, 16, 192 },
+              { 10, 32, 16, 64 },
+              { 9, 200, 64, 128 },
+              { 8, 8, 224, 128 },
+              { 10, 32, 16, 192 },
+              { 10, 32, 16, 64 },
+              { 10, 23, 80, 176 },
+              { 10, 23, 80, 176 },
+              { 10, 32, 16, 64 },
+          },
+          {
+              { 8, 224, 64, 128 },
+              { 10, 32, 16, 192 },
+              { 10, 32, 16, 64 },
+              { 9, 200, 64, 128 },
+              { 8, 8, 224, 128 },
+              { 10, 32, 16, 192 },
+              { 10, 32, 16, 64 },
+              { 10, 23, 80, 176 },
+              { 10, 23, 80, 176 },
+              { 10, 32, 16, 64 },
+          },
+          {
+              { 8, 224, 64, 128 },
+              { 10, 32, 16, 192 },
+              { 10, 32, 16, 64 },
+              { 9, 200, 64, 128 },
+              { 8, 8, 224, 128 },
+              { 10, 32, 16, 192 },
+              { 10, 32, 16, 64 },
+              { 10, 23, 80, 176 },
+              { 10, 23, 80, 176 },
+              { 10, 32, 16, 64 },
+#if EXT_TX_SIZES == 4
+          },
+          {
+              { 8, 224, 64, 128 },
+              { 10, 32, 16, 192 },
+              { 10, 32, 16, 64 },
+              { 9, 200, 64, 128 },
+              { 8, 8, 224, 128 },
+              { 10, 32, 16, 192 },
+              { 10, 32, 16, 64 },
+              { 10, 23, 80, 176 },
+              { 10, 23, 80, 176 },
+              { 10, 32, 16, 64 },
+#endif
+          },
+      },
+    };
+
+#else
+
+/* clang-format off */
+const vpx_tree_index vp10_ext_tx_tree[TREE_SIZE(TX_TYPES)] = {
+  -DCT_DCT, 2,
+  -ADST_ADST, 4,
+  -ADST_DCT, -DCT_ADST
+};
+/* clang-format on */
+
+static const vpx_prob
+    default_intra_ext_tx_prob[EXT_TX_SIZES][TX_TYPES][TX_TYPES - 1] = {
+      { { 240, 85, 128 }, { 4, 1, 248 }, { 4, 1, 8 }, { 4, 248, 128 } },
+      { { 244, 85, 128 }, { 8, 2, 248 }, { 8, 2, 8 }, { 8, 248, 128 } },
+      { { 248, 85, 128 }, { 16, 4, 248 }, { 16, 4, 8 }, { 16, 248, 128 } },
+    };
+
+static const vpx_prob default_inter_ext_tx_prob[EXT_TX_SIZES][TX_TYPES - 1] = {
+  { 160, 85, 128 }, { 176, 85, 128 }, { 192, 85, 128 },
+};
+#endif  // CONFIG_EXT_TX
+
+#if CONFIG_EXT_INTRA
+static const vpx_prob
+    default_intra_filter_probs[INTRA_FILTERS + 1][INTRA_FILTERS - 1] = {
+      { 98, 63, 60 }, { 98, 82, 80 }, { 94, 65, 103 },
+      { 49, 25, 24 }, { 72, 38, 50 },
+    };
+static const vpx_prob default_ext_intra_probs[2] = { 230, 230 };
+
+const vpx_tree_index vp10_intra_filter_tree[TREE_SIZE(INTRA_FILTERS)] = {
+  -INTRA_FILTER_LINEAR,      2, -INTRA_FILTER_8TAP, 4, -INTRA_FILTER_8TAP_SHARP,
+  -INTRA_FILTER_8TAP_SMOOTH,
+};
+#endif  // CONFIG_EXT_INTRA
+
+#if CONFIG_SUPERTX
+static const vpx_prob
+    default_supertx_prob[PARTITION_SUPERTX_CONTEXTS][TX_SIZES] = {
+      { 1, 160, 160, 170 }, { 1, 200, 200, 210 },
+    };
+#endif  // CONFIG_SUPERTX
+
+// FIXME(someone) need real defaults here
+static const struct segmentation_probs default_seg_probs = {
+  { 128, 128, 128, 128, 128, 128, 128 }, { 128, 128, 128 },
+};
+
+static void init_mode_probs(FRAME_CONTEXT *fc) {
+  vp10_copy(fc->uv_mode_prob, default_uv_probs);
+  vp10_copy(fc->y_mode_prob, default_if_y_probs);
+  vp10_copy(fc->switchable_interp_prob, default_switchable_interp_prob);
+  vp10_copy(fc->partition_prob, default_partition_probs);
+  vp10_copy(fc->intra_inter_prob, default_intra_inter_p);
+  vp10_copy(fc->comp_inter_prob, default_comp_inter_p);
+  vp10_copy(fc->comp_ref_prob, default_comp_ref_p);
+#if CONFIG_EXT_REFS
+  vp10_copy(fc->comp_bwdref_prob, default_comp_bwdref_p);
+#endif  // CONFIG_EXT_REFS
+  vp10_copy(fc->single_ref_prob, default_single_ref_p);
+  vp10_copy(fc->tx_size_probs, default_tx_size_prob);
+#if CONFIG_VAR_TX
+  vp10_copy(fc->txfm_partition_prob, default_txfm_partition_probs);
+#endif
+  vp10_copy(fc->skip_probs, default_skip_probs);
+#if CONFIG_REF_MV
+  vp10_copy(fc->newmv_prob, default_newmv_prob);
+  vp10_copy(fc->zeromv_prob, default_zeromv_prob);
+  vp10_copy(fc->refmv_prob, default_refmv_prob);
+  vp10_copy(fc->drl_prob, default_drl_prob);
+#if CONFIG_EXT_INTER
+  fc->new2mv_prob = default_new2mv_prob;
+#endif  // CONFIG_EXT_INTER
+#endif  // CONFIG_REF_MV
+  vp10_copy(fc->inter_mode_probs, default_inter_mode_probs);
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+  vp10_copy(fc->motvar_prob, default_motvar_prob);
+#endif  // CONFIG_OBMC || CONFIG_WARPED_MOTION
+#if CONFIG_EXT_INTER
+  vp10_copy(fc->inter_compound_mode_probs, default_inter_compound_mode_probs);
+  vp10_copy(fc->interintra_prob, default_interintra_prob);
+  vp10_copy(fc->interintra_mode_prob, default_interintra_mode_prob);
+  vp10_copy(fc->wedge_interintra_prob, default_wedge_interintra_prob);
+  vp10_copy(fc->wedge_interinter_prob, default_wedge_interinter_prob);
+#endif  // CONFIG_EXT_INTER
+#if CONFIG_SUPERTX
+  vp10_copy(fc->supertx_prob, default_supertx_prob);
+#endif  // CONFIG_SUPERTX
+  vp10_copy(fc->seg.tree_probs, default_seg_probs.tree_probs);
+  vp10_copy(fc->seg.pred_probs, default_seg_probs.pred_probs);
+#if CONFIG_EXT_INTRA
+  vp10_copy(fc->ext_intra_probs, default_ext_intra_probs);
+  vp10_copy(fc->intra_filter_probs, default_intra_filter_probs);
+#endif  // CONFIG_EXT_INTRA
+  vp10_copy(fc->inter_ext_tx_prob, default_inter_ext_tx_prob);
+  vp10_copy(fc->intra_ext_tx_prob, default_intra_ext_tx_prob);
+}
+
+#if CONFIG_EXT_INTERP
+const vpx_tree_index
+    vp10_switchable_interp_tree[TREE_SIZE(SWITCHABLE_FILTERS)] = {
+      -EIGHTTAP_REGULAR,
+      2,
+      4,
+      6,
+      -EIGHTTAP_SMOOTH,
+      -EIGHTTAP_SMOOTH2,
+      -MULTITAP_SHARP,
+      -MULTITAP_SHARP2,
+    };
+#else
+const vpx_tree_index vp10_switchable_interp_tree[TREE_SIZE(
+    SWITCHABLE_FILTERS)] = { -EIGHTTAP_REGULAR, 2, -EIGHTTAP_SMOOTH,
+                             -MULTITAP_SHARP };
+#endif  // CONFIG_EXT_INTERP
+
+void vp10_adapt_inter_frame_probs(VP10_COMMON *cm) {
+  int i, j;
+  FRAME_CONTEXT *fc = cm->fc;
+  const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx];
+  const FRAME_COUNTS *counts = &cm->counts;
+
+  for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
+    fc->intra_inter_prob[i] = vp10_mode_mv_merge_probs(
+        pre_fc->intra_inter_prob[i], counts->intra_inter[i]);
+  for (i = 0; i < COMP_INTER_CONTEXTS; i++)
+    fc->comp_inter_prob[i] = vp10_mode_mv_merge_probs(
+        pre_fc->comp_inter_prob[i], counts->comp_inter[i]);
+
+#if CONFIG_EXT_REFS
+  for (i = 0; i < REF_CONTEXTS; i++)
+    for (j = 0; j < (FWD_REFS - 1); j++)
+      fc->comp_ref_prob[i][j] = mode_mv_merge_probs(pre_fc->comp_ref_prob[i][j],
+                                                    counts->comp_ref[i][j]);
+  for (i = 0; i < REF_CONTEXTS; i++)
+    for (j = 0; j < (BWD_REFS - 1); j++)
+      fc->comp_bwdref_prob[i][j] = mode_mv_merge_probs(
+          pre_fc->comp_bwdref_prob[i][j], counts->comp_bwdref[i][j]);
+#else
+  for (i = 0; i < REF_CONTEXTS; i++)
+    for (j = 0; j < (COMP_REFS - 1); j++)
+      fc->comp_ref_prob[i][j] = mode_mv_merge_probs(pre_fc->comp_ref_prob[i][j],
+                                                    counts->comp_ref[i][j]);
+#endif  // CONFIG_EXT_REFS
+
+  for (i = 0; i < REF_CONTEXTS; i++)
+    for (j = 0; j < (SINGLE_REFS - 1); j++)
+      fc->single_ref_prob[i][j] = vp10_mode_mv_merge_probs(
+          pre_fc->single_ref_prob[i][j], counts->single_ref[i][j]);
+
+#if CONFIG_REF_MV
+  for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i)
+    fc->newmv_prob[i] =
+        vp10_mode_mv_merge_probs(pre_fc->newmv_prob[i], counts->newmv_mode[i]);
+  for (i = 0; i < ZEROMV_MODE_CONTEXTS; ++i)
+    fc->zeromv_prob[i] = vp10_mode_mv_merge_probs(pre_fc->zeromv_prob[i],
+                                                  counts->zeromv_mode[i]);
+  for (i = 0; i < REFMV_MODE_CONTEXTS; ++i)
+    fc->refmv_prob[i] =
+        vp10_mode_mv_merge_probs(pre_fc->refmv_prob[i], counts->refmv_mode[i]);
+
+  for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
+    fc->drl_prob[i] =
+        vp10_mode_mv_merge_probs(pre_fc->drl_prob[i], counts->drl_mode[i]);
+#if CONFIG_EXT_INTER
+  fc->new2mv_prob =
+      vp10_mode_mv_merge_probs(pre_fc->new2mv_prob, counts->new2mv_mode);
+#endif  // CONFIG_EXT_INTER
+#else
+  for (i = 0; i < INTER_MODE_CONTEXTS; i++)
+    vpx_tree_merge_probs(vp10_inter_mode_tree, pre_fc->inter_mode_probs[i],
+                         counts->inter_mode[i], fc->inter_mode_probs[i]);
+#endif
+
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+  for (i = BLOCK_8X8; i < BLOCK_SIZES; ++i)
+    vpx_tree_merge_probs(vp10_motvar_tree, pre_fc->motvar_prob[i],
+                         counts->motvar[i], fc->motvar_prob[i]);
+#endif  // CONFIG_OBMC || CONFIG_WARPED_MOTION
+
+#if CONFIG_SUPERTX
+  for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) {
+    int j;
+    for (j = 1; j < TX_SIZES; ++j) {
+      fc->supertx_prob[i][j] = vp10_mode_mv_merge_probs(
+          pre_fc->supertx_prob[i][j], counts->supertx[i][j]);
+    }
+  }
+#endif  // CONFIG_SUPERTX
+
+#if CONFIG_EXT_INTER
+  for (i = 0; i < INTER_MODE_CONTEXTS; i++)
+    vpx_tree_merge_probs(
+        vp10_inter_compound_mode_tree, pre_fc->inter_compound_mode_probs[i],
+        counts->inter_compound_mode[i], fc->inter_compound_mode_probs[i]);
+  for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
+    if (is_interintra_allowed_bsize_group(i))
+      fc->interintra_prob[i] = vp10_mode_mv_merge_probs(
+          pre_fc->interintra_prob[i], counts->interintra[i]);
+  }
+  for (i = 0; i < BLOCK_SIZE_GROUPS; i++) {
+    vpx_tree_merge_probs(
+        vp10_interintra_mode_tree, pre_fc->interintra_mode_prob[i],
+        counts->interintra_mode[i], fc->interintra_mode_prob[i]);
+  }
+  for (i = 0; i < BLOCK_SIZES; ++i) {
+    if (is_interintra_allowed_bsize(i) && is_interintra_wedge_used(i))
+      fc->wedge_interintra_prob[i] = vp10_mode_mv_merge_probs(
+          pre_fc->wedge_interintra_prob[i], counts->wedge_interintra[i]);
+  }
+  for (i = 0; i < BLOCK_SIZES; ++i) {
+    if (is_interinter_wedge_used(i))
+      fc->wedge_interinter_prob[i] = vp10_mode_mv_merge_probs(
+          pre_fc->wedge_interinter_prob[i], counts->wedge_interinter[i]);
+  }
+#endif  // CONFIG_EXT_INTER
+
+  for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
+    vpx_tree_merge_probs(vp10_intra_mode_tree, pre_fc->y_mode_prob[i],
+                         counts->y_mode[i], fc->y_mode_prob[i]);
+
+  if (cm->interp_filter == SWITCHABLE) {
+    for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
+      vpx_tree_merge_probs(
+          vp10_switchable_interp_tree, pre_fc->switchable_interp_prob[i],
+          counts->switchable_interp[i], fc->switchable_interp_prob[i]);
+  }
+}
+
+void vp10_adapt_intra_frame_probs(VP10_COMMON *cm) {
+  int i, j;
+  FRAME_CONTEXT *fc = cm->fc;
+  const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx];
+  const FRAME_COUNTS *counts = &cm->counts;
+
+  if (cm->tx_mode == TX_MODE_SELECT) {
+    for (i = 0; i < TX_SIZES - 1; ++i) {
+      for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
+        vpx_tree_merge_probs(vp10_tx_size_tree[i], pre_fc->tx_size_probs[i][j],
+                             counts->tx_size[i][j], fc->tx_size_probs[i][j]);
+    }
+  }
+
+#if CONFIG_VAR_TX
+  if (cm->tx_mode == TX_MODE_SELECT)
+    for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i)
+      fc->txfm_partition_prob[i] = vp10_mode_mv_merge_probs(
+          pre_fc->txfm_partition_prob[i], counts->txfm_partition[i]);
+#endif
+
+  for (i = 0; i < SKIP_CONTEXTS; ++i)
+    fc->skip_probs[i] =
+        vp10_mode_mv_merge_probs(pre_fc->skip_probs[i], counts->skip[i]);
+
+#if CONFIG_EXT_TX
+  for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+    int s;
+    for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
+      if (use_inter_ext_tx_for_txsize[s][i]) {
+        vpx_tree_merge_probs(
+            vp10_ext_tx_inter_tree[s], pre_fc->inter_ext_tx_prob[s][i],
+            counts->inter_ext_tx[s][i], fc->inter_ext_tx_prob[s][i]);
+      }
+    }
+    for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
+      if (use_intra_ext_tx_for_txsize[s][i]) {
+        int j;
+        for (j = 0; j < INTRA_MODES; ++j)
+          vpx_tree_merge_probs(
+              vp10_ext_tx_intra_tree[s], pre_fc->intra_ext_tx_prob[s][i][j],
+              counts->intra_ext_tx[s][i][j], fc->intra_ext_tx_prob[s][i][j]);
+      }
+    }
+  }
+#else
+  for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+    for (j = 0; j < TX_TYPES; ++j)
+      vpx_tree_merge_probs(vp10_ext_tx_tree, pre_fc->intra_ext_tx_prob[i][j],
+                           counts->intra_ext_tx[i][j],
+                           fc->intra_ext_tx_prob[i][j]);
+  }
+  for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+    vpx_tree_merge_probs(vp10_ext_tx_tree, pre_fc->inter_ext_tx_prob[i],
+                         counts->inter_ext_tx[i], fc->inter_ext_tx_prob[i]);
+  }
+#endif  // CONFIG_EXT_TX
+
+  if (cm->seg.temporal_update) {
+    for (i = 0; i < PREDICTION_PROBS; i++)
+      fc->seg.pred_probs[i] = vp10_mode_mv_merge_probs(
+          pre_fc->seg.pred_probs[i], counts->seg.pred[i]);
+
+    vpx_tree_merge_probs(vp10_segment_tree, pre_fc->seg.tree_probs,
+                         counts->seg.tree_mispred, fc->seg.tree_probs);
+  } else {
+    vpx_tree_merge_probs(vp10_segment_tree, pre_fc->seg.tree_probs,
+                         counts->seg.tree_total, fc->seg.tree_probs);
+  }
+
+  for (i = 0; i < INTRA_MODES; ++i)
+    vpx_tree_merge_probs(vp10_intra_mode_tree, pre_fc->uv_mode_prob[i],
+                         counts->uv_mode[i], fc->uv_mode_prob[i]);
+
+#if CONFIG_EXT_PARTITION_TYPES
+  vpx_tree_merge_probs(vp10_partition_tree, pre_fc->partition_prob[0],
+                       counts->partition[0], fc->partition_prob[0]);
+  for (i = 1; i < PARTITION_CONTEXTS; i++)
+    vpx_tree_merge_probs(vp10_ext_partition_tree, pre_fc->partition_prob[i],
+                         counts->partition[i], fc->partition_prob[i]);
+#else
+  for (i = 0; i < PARTITION_CONTEXTS; i++)
+    vpx_tree_merge_probs(vp10_partition_tree, pre_fc->partition_prob[i],
+                         counts->partition[i], fc->partition_prob[i]);
+#endif  // CONFIG_EXT_PARTITION_TYPES
+
+#if CONFIG_EXT_INTRA
+  for (i = 0; i < PLANE_TYPES; ++i) {
+    fc->ext_intra_probs[i] = vp10_mode_mv_merge_probs(
+        pre_fc->ext_intra_probs[i], counts->ext_intra[i]);
+  }
+
+  for (i = 0; i < INTRA_FILTERS + 1; ++i)
+    vpx_tree_merge_probs(vp10_intra_filter_tree, pre_fc->intra_filter_probs[i],
+                         counts->intra_filter[i], fc->intra_filter_probs[i]);
+#endif  // CONFIG_EXT_INTRA
+}
+
+static void set_default_lf_deltas(struct loopfilter *lf) {
+  lf->mode_ref_delta_enabled = 1;
+  lf->mode_ref_delta_update = 1;
+
+  lf->ref_deltas[INTRA_FRAME] = 1;
+  lf->ref_deltas[LAST_FRAME] = 0;
+#if CONFIG_EXT_REFS
+  lf->ref_deltas[LAST2_FRAME] = lf->ref_deltas[LAST_FRAME];
+  lf->ref_deltas[LAST3_FRAME] = lf->ref_deltas[LAST_FRAME];
+  lf->ref_deltas[BWDREF_FRAME] = lf->ref_deltas[LAST_FRAME];
+#endif  // CONFIG_EXT_REFS
+  lf->ref_deltas[GOLDEN_FRAME] = -1;
+  lf->ref_deltas[ALTREF_FRAME] = -1;
+
+  lf->mode_deltas[0] = 0;
+  lf->mode_deltas[1] = 0;
+}
+
+void vp10_setup_past_independence(VP10_COMMON *cm) {
+  // Reset the segment feature data to the default stats:
+  // Features disabled, 0, with delta coding (Default state).
+  struct loopfilter *const lf = &cm->lf;
+
+  int i;
+  vp10_clearall_segfeatures(&cm->seg);
+  cm->seg.abs_delta = SEGMENT_DELTADATA;
+
+  if (cm->last_frame_seg_map && !cm->frame_parallel_decode)
+    memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols));
+
+  if (cm->current_frame_seg_map)
+    memset(cm->current_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols));
+
+  // Reset the mode ref deltas for loop filter
+  vp10_zero(lf->last_ref_deltas);
+  vp10_zero(lf->last_mode_deltas);
+  set_default_lf_deltas(lf);
+
+  // To force update of the sharpness
+  lf->last_sharpness_level = -1;
+#if CONFIG_LOOP_RESTORATION
+  if (cm->rst_info.bilateral_level) {
+    for (i = 0; i < cm->rst_internal.ntiles; ++i)
+      cm->rst_info.bilateral_level[i] = -1;
+  }
+#endif  // CONFIG_LOOP_RESTORATION
+
+  vp10_default_coef_probs(cm);
+  init_mode_probs(cm->fc);
+  vp10_init_mv_probs(cm);
+  cm->fc->initialized = 1;
+
+  if (cm->frame_type == KEY_FRAME || cm->error_resilient_mode ||
+      cm->reset_frame_context == RESET_FRAME_CONTEXT_ALL) {
+    // Reset all frame contexts.
+    for (i = 0; i < FRAME_CONTEXTS; ++i) cm->frame_contexts[i] = *cm->fc;
+  } else if (cm->reset_frame_context == RESET_FRAME_CONTEXT_CURRENT) {
+    // Reset only the frame context specified in the frame header.
+    cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
+  }
+
+  // prev_mip will only be allocated in encoder.
+  if (frame_is_intra_only(cm) && cm->prev_mip && !cm->frame_parallel_decode)
+    memset(cm->prev_mip, 0,
+           cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->prev_mip));
+
+  cm->frame_context_idx = 0;
+}
diff --git a/av1/common/entropymode.h b/av1/common/entropymode.h
new file mode 100644
index 0000000..4616aa2
--- /dev/null
+++ b/av1/common/entropymode.h
@@ -0,0 +1,280 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_ENTROPYMODE_H_
+#define VP10_COMMON_ENTROPYMODE_H_
+
+#include "av1/common/entropy.h"
+#include "av1/common/entropymv.h"
+#include "av1/common/filter.h"
+#include "av1/common/seg_common.h"
+#include "aom_dsp/vpx_filter.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define BLOCK_SIZE_GROUPS 4
+
+#define TX_SIZE_CONTEXTS 2
+
+#define INTER_OFFSET(mode) ((mode)-NEARESTMV)
+#if CONFIG_EXT_INTER
+#define INTER_COMPOUND_OFFSET(mode) ((mode)-NEAREST_NEARESTMV)
+#endif  // CONFIG_EXT_INTER
+
+#define PALETTE_COLOR_CONTEXTS 16
+#define PALETTE_MAX_SIZE 8
+#define PALETTE_BLOCK_SIZES (BLOCK_LARGEST - BLOCK_8X8 + 1)
+#define PALETTE_Y_MODE_CONTEXTS 3
+#define PALETTE_MAX_BLOCK_SIZE (64 * 64)
+
+struct VP10Common;
+
+struct seg_counts {
+  unsigned int tree_total[MAX_SEGMENTS];
+  unsigned int tree_mispred[MAX_SEGMENTS];
+  unsigned int pred[PREDICTION_PROBS][2];
+};
+
+typedef struct frame_contexts {
+  vpx_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1];
+  vpx_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
+#if CONFIG_EXT_PARTITION_TYPES
+  vpx_prob partition_prob[PARTITION_CONTEXTS][EXT_PARTITION_TYPES - 1];
+#else
+  vpx_prob partition_prob[PARTITION_CONTEXTS][PARTITION_TYPES - 1];
+#endif
+  vp10_coeff_probs_model coef_probs[TX_SIZES][PLANE_TYPES];
+#if CONFIG_ANS
+  coeff_cdf_model coef_cdfs[TX_SIZES][PLANE_TYPES];
+#endif
+  vpx_prob
+      switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS -
+                                                         1];
+
+#if CONFIG_REF_MV
+  vpx_prob newmv_prob[NEWMV_MODE_CONTEXTS];
+  vpx_prob zeromv_prob[ZEROMV_MODE_CONTEXTS];
+  vpx_prob refmv_prob[REFMV_MODE_CONTEXTS];
+  vpx_prob drl_prob[DRL_MODE_CONTEXTS];
+
+#if CONFIG_EXT_INTER
+  vpx_prob new2mv_prob;
+#endif  // CONFIG_EXT_INTER
+#endif  // CONFIG_REF_MV
+
+  vpx_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1];
+#if CONFIG_EXT_INTER
+  vpx_prob
+      inter_compound_mode_probs[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES - 1];
+  vpx_prob interintra_prob[BLOCK_SIZE_GROUPS];
+  vpx_prob interintra_mode_prob[BLOCK_SIZE_GROUPS][INTERINTRA_MODES - 1];
+  vpx_prob wedge_interintra_prob[BLOCK_SIZES];
+  vpx_prob wedge_interinter_prob[BLOCK_SIZES];
+#endif  // CONFIG_EXT_INTER
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+  vpx_prob motvar_prob[BLOCK_SIZES][MOTION_VARIATIONS - 1];
+#endif  // CONFIG_OBMC || CONFIG_WARPED_MOTION
+  vpx_prob intra_inter_prob[INTRA_INTER_CONTEXTS];
+  vpx_prob comp_inter_prob[COMP_INTER_CONTEXTS];
+  vpx_prob single_ref_prob[REF_CONTEXTS][SINGLE_REFS - 1];
+#if CONFIG_EXT_REFS
+  vpx_prob comp_ref_prob[REF_CONTEXTS][FWD_REFS - 1];
+  vpx_prob comp_bwdref_prob[REF_CONTEXTS][BWD_REFS - 1];
+#else
+  vpx_prob comp_ref_prob[REF_CONTEXTS][COMP_REFS - 1];
+#endif  // CONFIG_EXT_REFS
+  vpx_prob tx_size_probs[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES - 1];
+#if CONFIG_VAR_TX
+  vpx_prob txfm_partition_prob[TXFM_PARTITION_CONTEXTS];
+#endif
+  vpx_prob skip_probs[SKIP_CONTEXTS];
+#if CONFIG_REF_MV
+  nmv_context nmvc[NMV_CONTEXTS];
+#else
+  nmv_context nmvc;
+#endif
+  int initialized;
+#if CONFIG_EXT_TX
+  vpx_prob inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1];
+  vpx_prob
+      intra_ext_tx_prob[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES][TX_TYPES -
+                                                                      1];
+#else
+  vpx_prob intra_ext_tx_prob[EXT_TX_SIZES][TX_TYPES][TX_TYPES - 1];
+  vpx_prob inter_ext_tx_prob[EXT_TX_SIZES][TX_TYPES - 1];
+#endif  // CONFIG_EXT_TX
+#if CONFIG_SUPERTX
+  vpx_prob supertx_prob[PARTITION_SUPERTX_CONTEXTS][TX_SIZES];
+#endif  // CONFIG_SUPERTX
+  struct segmentation_probs seg;
+#if CONFIG_EXT_INTRA
+  vpx_prob ext_intra_probs[PLANE_TYPES];
+  vpx_prob intra_filter_probs[INTRA_FILTERS + 1][INTRA_FILTERS - 1];
+#endif  // CONFIG_EXT_INTRA
+#if CONFIG_GLOBAL_MOTION
+  vpx_prob global_motion_types_prob[GLOBAL_MOTION_TYPES - 1];
+#endif  // CONFIG_GLOBAL_MOTION
+} FRAME_CONTEXT;
+
+typedef struct FRAME_COUNTS {
+  // Note: This structure should only contain 'unsigned int' fields, or
+  // aggregates built solely from 'unsigned int' fields/elements
+  unsigned int kf_y_mode[INTRA_MODES][INTRA_MODES][INTRA_MODES];
+  unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES];
+  unsigned int uv_mode[INTRA_MODES][INTRA_MODES];
+#if CONFIG_EXT_PARTITION_TYPES
+  unsigned int partition[PARTITION_CONTEXTS][EXT_PARTITION_TYPES];
+#else
+  unsigned int partition[PARTITION_CONTEXTS][PARTITION_TYPES];
+#endif
+  vp10_coeff_count_model coef[TX_SIZES][PLANE_TYPES];
+  unsigned int
+      eob_branch[TX_SIZES][PLANE_TYPES][REF_TYPES][COEF_BANDS][COEFF_CONTEXTS];
+  unsigned int
+      switchable_interp[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
+#if CONFIG_REF_MV
+  unsigned int newmv_mode[NEWMV_MODE_CONTEXTS][2];
+  unsigned int zeromv_mode[ZEROMV_MODE_CONTEXTS][2];
+  unsigned int refmv_mode[REFMV_MODE_CONTEXTS][2];
+  unsigned int drl_mode[DRL_MODE_CONTEXTS][2];
+#if CONFIG_EXT_INTER
+  unsigned int new2mv_mode[2];
+#endif  // CONFIG_EXT_INTER
+#endif
+
+  unsigned int inter_mode[INTER_MODE_CONTEXTS][INTER_MODES];
+#if CONFIG_EXT_INTER
+  unsigned int inter_compound_mode[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES];
+  unsigned int interintra[BLOCK_SIZE_GROUPS][2];
+  unsigned int interintra_mode[BLOCK_SIZE_GROUPS][INTERINTRA_MODES];
+  unsigned int wedge_interintra[BLOCK_SIZES][2];
+  unsigned int wedge_interinter[BLOCK_SIZES][2];
+#endif  // CONFIG_EXT_INTER
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+  unsigned int motvar[BLOCK_SIZES][MOTION_VARIATIONS];
+#endif  // CONFIG_OBMC || CONFIG_WARPED_MOTION
+  unsigned int intra_inter[INTRA_INTER_CONTEXTS][2];
+  unsigned int comp_inter[COMP_INTER_CONTEXTS][2];
+  unsigned int single_ref[REF_CONTEXTS][SINGLE_REFS - 1][2];
+#if CONFIG_EXT_REFS
+  unsigned int comp_ref[REF_CONTEXTS][FWD_REFS - 1][2];
+  unsigned int comp_bwdref[REF_CONTEXTS][BWD_REFS - 1][2];
+#else
+  unsigned int comp_ref[REF_CONTEXTS][COMP_REFS - 1][2];
+#endif  // CONFIG_EXT_REFS
+  // TODO(any): tx_size_totals is only used by the encoder to decide whether
+  // to use forward updates for the coeff probs, and as such it does not really
+  // belong into this structure.
+  unsigned int tx_size_totals[TX_SIZES];
+  unsigned int tx_size[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES];
+#if CONFIG_VAR_TX
+  unsigned int txfm_partition[TXFM_PARTITION_CONTEXTS][2];
+#endif
+  unsigned int skip[SKIP_CONTEXTS][2];
+#if CONFIG_REF_MV
+  nmv_context_counts mv[NMV_CONTEXTS];
+#else
+  nmv_context_counts mv;
+#endif
+#if CONFIG_EXT_TX
+#if CONFIG_RECT_TX
+  unsigned int tx_size_implied[TX_SIZES][TX_SIZES];
+#endif  // CONFIG_RECT_TX
+  unsigned int inter_ext_tx[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
+  unsigned int
+      intra_ext_tx[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES][TX_TYPES];
+#else
+  unsigned int intra_ext_tx[EXT_TX_SIZES][TX_TYPES][TX_TYPES];
+  unsigned int inter_ext_tx[EXT_TX_SIZES][TX_TYPES];
+#endif  // CONFIG_EXT_TX
+#if CONFIG_SUPERTX
+  unsigned int supertx[PARTITION_SUPERTX_CONTEXTS][TX_SIZES][2];
+  unsigned int supertx_size[TX_SIZES];
+#endif  // CONFIG_SUPERTX
+  struct seg_counts seg;
+#if CONFIG_EXT_INTRA
+  unsigned int ext_intra[PLANE_TYPES][2];
+  unsigned int intra_filter[INTRA_FILTERS + 1][INTRA_FILTERS];
+#endif  // CONFIG_EXT_INTRA
+} FRAME_COUNTS;
+
+extern const vpx_prob
+    vp10_kf_y_mode_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1];
+extern const vpx_prob vp10_default_palette_y_mode_prob[PALETTE_BLOCK_SIZES]
+                                                      [PALETTE_Y_MODE_CONTEXTS];
+extern const vpx_prob vp10_default_palette_uv_mode_prob[2];
+extern const vpx_prob
+    vp10_default_palette_y_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1];
+extern const vpx_prob
+    vp10_default_palette_uv_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1];
+extern const vpx_prob vp10_default_palette_y_color_prob
+    [PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1];
+extern const vpx_prob vp10_default_palette_uv_color_prob
+    [PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1];
+
+extern const vpx_tree_index vp10_intra_mode_tree[TREE_SIZE(INTRA_MODES)];
+extern const vpx_tree_index vp10_inter_mode_tree[TREE_SIZE(INTER_MODES)];
+#if CONFIG_EXT_INTER
+extern const vpx_tree_index
+    vp10_interintra_mode_tree[TREE_SIZE(INTERINTRA_MODES)];
+extern const vpx_tree_index
+    vp10_inter_compound_mode_tree[TREE_SIZE(INTER_COMPOUND_MODES)];
+#endif  // CONFIG_EXT_INTER
+extern const vpx_tree_index vp10_partition_tree[TREE_SIZE(PARTITION_TYPES)];
+#if CONFIG_EXT_PARTITION_TYPES
+extern const vpx_tree_index
+    vp10_ext_partition_tree[TREE_SIZE(EXT_PARTITION_TYPES)];
+#endif
+extern const vpx_tree_index
+    vp10_switchable_interp_tree[TREE_SIZE(SWITCHABLE_FILTERS)];
+extern const vpx_tree_index vp10_palette_size_tree[TREE_SIZE(PALETTE_SIZES)];
+extern const vpx_tree_index
+    vp10_palette_color_tree[PALETTE_MAX_SIZE - 1][TREE_SIZE(PALETTE_COLORS)];
+extern const vpx_tree_index
+    vp10_tx_size_tree[TX_SIZES - 1][TREE_SIZE(TX_SIZES)];
+#if CONFIG_EXT_INTRA
+extern const vpx_tree_index vp10_intra_filter_tree[TREE_SIZE(INTRA_FILTERS)];
+#endif  // CONFIG_EXT_INTRA
+#if CONFIG_EXT_TX
+extern const vpx_tree_index
+    vp10_ext_tx_inter_tree[EXT_TX_SETS_INTER][TREE_SIZE(TX_TYPES)];
+extern const vpx_tree_index
+    vp10_ext_tx_intra_tree[EXT_TX_SETS_INTRA][TREE_SIZE(TX_TYPES)];
+#else
+extern const vpx_tree_index vp10_ext_tx_tree[TREE_SIZE(TX_TYPES)];
+#endif  // CONFIG_EXT_TX
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+extern const vpx_tree_index vp10_motvar_tree[TREE_SIZE(MOTION_VARIATIONS)];
+#endif  // CONFIG_OBMC || CONFIG_WARPED_MOTION
+
+void vp10_setup_past_independence(struct VP10Common *cm);
+
+void vp10_adapt_intra_frame_probs(struct VP10Common *cm);
+void vp10_adapt_inter_frame_probs(struct VP10Common *cm);
+
+static INLINE int vp10_ceil_log2(int n) {
+  int i = 1, p = 2;
+  while (p < n) {
+    i++;
+    p = p << 1;
+  }
+  return i;
+}
+
+int vp10_get_palette_color_context(const uint8_t *color_map, int cols, int r,
+                                   int c, int n, int *color_order);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_ENTROPYMODE_H_
diff --git a/av1/common/entropymv.c b/av1/common/entropymv.c
new file mode 100644
index 0000000..f3dba3f
--- /dev/null
+++ b/av1/common/entropymv.c
@@ -0,0 +1,282 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "av1/common/onyxc_int.h"
+#include "av1/common/entropymv.h"
+
+// Integer pel reference mv threshold for use of high-precision 1/8 mv
+#define COMPANDED_MVREF_THRESH 8
+
+const vpx_tree_index vp10_mv_joint_tree[TREE_SIZE(MV_JOINTS)] = {
+  -MV_JOINT_ZERO, 2, -MV_JOINT_HNZVZ, 4, -MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ
+};
+
+/* clang-format off */
+const vpx_tree_index vp10_mv_class_tree[TREE_SIZE(MV_CLASSES)] = {
+  -MV_CLASS_0, 2,
+  -MV_CLASS_1, 4,
+  6, 8,
+  -MV_CLASS_2, -MV_CLASS_3,
+  10, 12,
+  -MV_CLASS_4, -MV_CLASS_5,
+  -MV_CLASS_6, 14,
+  16, 18,
+  -MV_CLASS_7, -MV_CLASS_8,
+  -MV_CLASS_9, -MV_CLASS_10,
+};
+/* clang-format on */
+
+const vpx_tree_index vp10_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = {
+  -0, -1,
+};
+
+const vpx_tree_index vp10_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = { -0, 2,  -1,
+                                                                4,  -2, -3 };
+
+static const nmv_context default_nmv_context = {
+#if CONFIG_REF_MV
+  { 1, 64, 96 },
+  128,
+#else
+  { 32, 64, 96 },
+#endif
+  { {
+        // Vertical component
+        128,                                                   // sign
+        { 224, 144, 192, 168, 192, 176, 192, 198, 198, 245 },  // class
+        { 216 },                                               // class0
+        { 136, 140, 148, 160, 176, 192, 224, 234, 234, 240 },  // bits
+        { { 128, 128, 64 }, { 96, 112, 64 } },                 // class0_fp
+        { 64, 96, 64 },                                        // fp
+        160,                                                   // class0_hp bit
+        128,                                                   // hp
+    },
+    {
+        // Horizontal component
+        128,                                                   // sign
+        { 216, 128, 176, 160, 176, 176, 192, 198, 198, 208 },  // class
+        { 208 },                                               // class0
+        { 136, 140, 148, 160, 176, 192, 224, 234, 234, 240 },  // bits
+        { { 128, 128, 64 }, { 96, 112, 64 } },                 // class0_fp
+        { 64, 96, 64 },                                        // fp
+        160,                                                   // class0_hp bit
+        128,                                                   // hp
+    } },
+};
+
+static const uint8_t log_in_base_2[] = {
+  0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7,
+  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10
+};
+
+#if CONFIG_GLOBAL_MOTION
+const vpx_tree_index
+    vp10_global_motion_types_tree[TREE_SIZE(GLOBAL_MOTION_TYPES)] = {
+      -GLOBAL_ZERO, 2, -GLOBAL_TRANSLATION, 4, -GLOBAL_ROTZOOM, -GLOBAL_AFFINE
+    };
+
+static const vpx_prob default_global_motion_types_prob[GLOBAL_MOTION_TYPES -
+                                                       1] = { 224, 128, 128 };
+#endif  // CONFIG_GLOBAL_MOTION
+
+static INLINE int mv_class_base(MV_CLASS_TYPE c) {
+  return c ? CLASS0_SIZE << (c + 2) : 0;
+}
+
+MV_CLASS_TYPE vp10_get_mv_class(int z, int *offset) {
+  const MV_CLASS_TYPE c = (z >= CLASS0_SIZE * 4096)
+                              ? MV_CLASS_10
+                              : (MV_CLASS_TYPE)log_in_base_2[z >> 3];
+  if (offset) *offset = z - mv_class_base(c);
+  return c;
+}
+
+// TODO(jingning): This idle function is intentionally left as is for
+// experimental purpose.
+int vp10_use_mv_hp(const MV *ref) {
+  (void)ref;
+  return 1;
+}
+
+static void inc_mv_component(int v, nmv_component_counts *comp_counts, int incr,
+                             int usehp) {
+  int s, z, c, o, d, e, f;
+  assert(v != 0); /* should not be zero */
+  s = v < 0;
+  comp_counts->sign[s] += incr;
+  z = (s ? -v : v) - 1; /* magnitude - 1 */
+
+  c = vp10_get_mv_class(z, &o);
+  comp_counts->classes[c] += incr;
+
+  d = (o >> 3);     /* int mv data */
+  f = (o >> 1) & 3; /* fractional pel mv data */
+  e = (o & 1);      /* high precision mv data */
+
+  if (c == MV_CLASS_0) {
+    comp_counts->class0[d] += incr;
+    comp_counts->class0_fp[d][f] += incr;
+    if (usehp) comp_counts->class0_hp[e] += incr;
+  } else {
+    int i;
+    int b = c + CLASS0_BITS - 1;  // number of bits
+    for (i = 0; i < b; ++i) comp_counts->bits[i][((d >> i) & 1)] += incr;
+    comp_counts->fp[f] += incr;
+    if (usehp) comp_counts->hp[e] += incr;
+  }
+}
+
+void vp10_inc_mv(const MV *mv, nmv_context_counts *counts, const int usehp) {
+  if (counts != NULL) {
+    const MV_JOINT_TYPE j = vp10_get_mv_joint(mv);
+
+#if CONFIG_REF_MV
+    ++counts->zero_rmv[j == MV_JOINT_ZERO];
+    if (j == MV_JOINT_ZERO) return;
+#endif
+    ++counts->joints[j];
+
+    if (mv_joint_vertical(j))
+      inc_mv_component(mv->row, &counts->comps[0], 1, usehp);
+
+    if (mv_joint_horizontal(j))
+      inc_mv_component(mv->col, &counts->comps[1], 1, usehp);
+  }
+}
+
+void vp10_adapt_mv_probs(VP10_COMMON *cm, int allow_hp) {
+  int i, j;
+#if CONFIG_REF_MV
+  int idx;
+  for (idx = 0; idx < NMV_CONTEXTS; ++idx) {
+    nmv_context *fc = &cm->fc->nmvc[idx];
+    const nmv_context *pre_fc =
+        &cm->frame_contexts[cm->frame_context_idx].nmvc[idx];
+    const nmv_context_counts *counts = &cm->counts.mv[idx];
+
+    vpx_tree_merge_probs(vp10_mv_joint_tree, pre_fc->joints, counts->joints,
+                         fc->joints);
+#if CONFIG_REF_MV
+    fc->zero_rmv = vp10_mode_mv_merge_probs(pre_fc->zero_rmv, counts->zero_rmv);
+#endif
+
+    for (i = 0; i < 2; ++i) {
+      nmv_component *comp = &fc->comps[i];
+      const nmv_component *pre_comp = &pre_fc->comps[i];
+      const nmv_component_counts *c = &counts->comps[i];
+
+      comp->sign = vp10_mode_mv_merge_probs(pre_comp->sign, c->sign);
+      vpx_tree_merge_probs(vp10_mv_class_tree, pre_comp->classes, c->classes,
+                           comp->classes);
+      vpx_tree_merge_probs(vp10_mv_class0_tree, pre_comp->class0, c->class0,
+                           comp->class0);
+
+      for (j = 0; j < MV_OFFSET_BITS; ++j)
+        comp->bits[j] = vp10_mode_mv_merge_probs(pre_comp->bits[j], c->bits[j]);
+
+      for (j = 0; j < CLASS0_SIZE; ++j)
+        vpx_tree_merge_probs(vp10_mv_fp_tree, pre_comp->class0_fp[j],
+                             c->class0_fp[j], comp->class0_fp[j]);
+
+      vpx_tree_merge_probs(vp10_mv_fp_tree, pre_comp->fp, c->fp, comp->fp);
+
+      if (allow_hp) {
+        comp->class0_hp =
+            vp10_mode_mv_merge_probs(pre_comp->class0_hp, c->class0_hp);
+        comp->hp = vp10_mode_mv_merge_probs(pre_comp->hp, c->hp);
+      }
+    }
+  }
+#else
+  nmv_context *fc = &cm->fc->nmvc;
+  const nmv_context *pre_fc = &cm->frame_contexts[cm->frame_context_idx].nmvc;
+  const nmv_context_counts *counts = &cm->counts.mv;
+
+  vpx_tree_merge_probs(vp10_mv_joint_tree, pre_fc->joints, counts->joints,
+                       fc->joints);
+
+  for (i = 0; i < 2; ++i) {
+    nmv_component *comp = &fc->comps[i];
+    const nmv_component *pre_comp = &pre_fc->comps[i];
+    const nmv_component_counts *c = &counts->comps[i];
+
+    comp->sign = vp10_mode_mv_merge_probs(pre_comp->sign, c->sign);
+    vpx_tree_merge_probs(vp10_mv_class_tree, pre_comp->classes, c->classes,
+                         comp->classes);
+    vpx_tree_merge_probs(vp10_mv_class0_tree, pre_comp->class0, c->class0,
+                         comp->class0);
+
+    for (j = 0; j < MV_OFFSET_BITS; ++j)
+      comp->bits[j] = vp10_mode_mv_merge_probs(pre_comp->bits[j], c->bits[j]);
+
+    for (j = 0; j < CLASS0_SIZE; ++j)
+      vpx_tree_merge_probs(vp10_mv_fp_tree, pre_comp->class0_fp[j],
+                           c->class0_fp[j], comp->class0_fp[j]);
+
+    vpx_tree_merge_probs(vp10_mv_fp_tree, pre_comp->fp, c->fp, comp->fp);
+
+    if (allow_hp) {
+      comp->class0_hp =
+          vp10_mode_mv_merge_probs(pre_comp->class0_hp, c->class0_hp);
+      comp->hp = vp10_mode_mv_merge_probs(pre_comp->hp, c->hp);
+    }
+  }
+#endif
+}
+
+void vp10_init_mv_probs(VP10_COMMON *cm) {
+#if CONFIG_REF_MV
+  int i;
+  for (i = 0; i < NMV_CONTEXTS; ++i) cm->fc->nmvc[i] = default_nmv_context;
+#else
+  cm->fc->nmvc = default_nmv_context;
+#endif
+#if CONFIG_GLOBAL_MOTION
+  vp10_copy(cm->fc->global_motion_types_prob, default_global_motion_types_prob);
+#endif  // CONFIG_GLOBAL_MOTION
+}
diff --git a/av1/common/entropymv.h b/av1/common/entropymv.h
new file mode 100644
index 0000000..c809a67
--- /dev/null
+++ b/av1/common/entropymv.h
@@ -0,0 +1,143 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_ENTROPYMV_H_
+#define VP10_COMMON_ENTROPYMV_H_
+
+#include "./vpx_config.h"
+
+#include "aom_dsp/prob.h"
+
+#include "av1/common/mv.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP10Common;
+
+void vp10_init_mv_probs(struct VP10Common *cm);
+
+void vp10_adapt_mv_probs(struct VP10Common *cm, int usehp);
+int vp10_use_mv_hp(const MV *ref);
+
+#define MV_UPDATE_PROB 252
+
+/* Symbols for coding which components are zero jointly */
+#define MV_JOINTS 4
+typedef enum {
+  MV_JOINT_ZERO = 0,   /* Zero vector */
+  MV_JOINT_HNZVZ = 1,  /* Vert zero, hor nonzero */
+  MV_JOINT_HZVNZ = 2,  /* Hor zero, vert nonzero */
+  MV_JOINT_HNZVNZ = 3, /* Both components nonzero */
+} MV_JOINT_TYPE;
+
+static INLINE int mv_joint_vertical(MV_JOINT_TYPE type) {
+  return type == MV_JOINT_HZVNZ || type == MV_JOINT_HNZVNZ;
+}
+
+static INLINE int mv_joint_horizontal(MV_JOINT_TYPE type) {
+  return type == MV_JOINT_HNZVZ || type == MV_JOINT_HNZVNZ;
+}
+
+/* Symbols for coding magnitude class of nonzero components */
+#define MV_CLASSES 11
+typedef enum {
+  MV_CLASS_0 = 0,   /* (0, 2]     integer pel */
+  MV_CLASS_1 = 1,   /* (2, 4]     integer pel */
+  MV_CLASS_2 = 2,   /* (4, 8]     integer pel */
+  MV_CLASS_3 = 3,   /* (8, 16]    integer pel */
+  MV_CLASS_4 = 4,   /* (16, 32]   integer pel */
+  MV_CLASS_5 = 5,   /* (32, 64]   integer pel */
+  MV_CLASS_6 = 6,   /* (64, 128]  integer pel */
+  MV_CLASS_7 = 7,   /* (128, 256] integer pel */
+  MV_CLASS_8 = 8,   /* (256, 512] integer pel */
+  MV_CLASS_9 = 9,   /* (512, 1024] integer pel */
+  MV_CLASS_10 = 10, /* (1024,2048] integer pel */
+} MV_CLASS_TYPE;
+
+#define CLASS0_BITS 1 /* bits at integer precision for class 0 */
+#define CLASS0_SIZE (1 << CLASS0_BITS)
+#define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2)
+#define MV_FP_SIZE 4
+
+#define MV_MAX_BITS (MV_CLASSES + CLASS0_BITS + 2)
+#define MV_MAX ((1 << MV_MAX_BITS) - 1)
+#define MV_VALS ((MV_MAX << 1) + 1)
+
+#define MV_IN_USE_BITS 14
+#define MV_UPP ((1 << MV_IN_USE_BITS) - 1)
+#define MV_LOW (-(1 << MV_IN_USE_BITS))
+
+extern const vpx_tree_index vp10_mv_joint_tree[];
+extern const vpx_tree_index vp10_mv_class_tree[];
+extern const vpx_tree_index vp10_mv_class0_tree[];
+extern const vpx_tree_index vp10_mv_fp_tree[];
+
+typedef struct {
+  vpx_prob sign;
+  vpx_prob classes[MV_CLASSES - 1];
+  vpx_prob class0[CLASS0_SIZE - 1];
+  vpx_prob bits[MV_OFFSET_BITS];
+  vpx_prob class0_fp[CLASS0_SIZE][MV_FP_SIZE - 1];
+  vpx_prob fp[MV_FP_SIZE - 1];
+  vpx_prob class0_hp;
+  vpx_prob hp;
+} nmv_component;
+
+typedef struct {
+  vpx_prob joints[MV_JOINTS - 1];
+#if CONFIG_REF_MV
+  vpx_prob zero_rmv;
+#endif
+  nmv_component comps[2];
+} nmv_context;
+
+static INLINE MV_JOINT_TYPE vp10_get_mv_joint(const MV *mv) {
+  if (mv->row == 0) {
+    return mv->col == 0 ? MV_JOINT_ZERO : MV_JOINT_HNZVZ;
+  } else {
+    return mv->col == 0 ? MV_JOINT_HZVNZ : MV_JOINT_HNZVNZ;
+  }
+}
+
+MV_CLASS_TYPE vp10_get_mv_class(int z, int *offset);
+
+typedef struct {
+  unsigned int sign[2];
+  unsigned int classes[MV_CLASSES];
+  unsigned int class0[CLASS0_SIZE];
+  unsigned int bits[MV_OFFSET_BITS][2];
+  unsigned int class0_fp[CLASS0_SIZE][MV_FP_SIZE];
+  unsigned int fp[MV_FP_SIZE];
+  unsigned int class0_hp[2];
+  unsigned int hp[2];
+} nmv_component_counts;
+
+typedef struct {
+  unsigned int joints[MV_JOINTS];
+#if CONFIG_REF_MV
+  unsigned int zero_rmv[2];
+#endif
+  nmv_component_counts comps[2];
+} nmv_context_counts;
+
+void vp10_inc_mv(const MV *mv, nmv_context_counts *mvctx, const int usehp);
+
+#if CONFIG_GLOBAL_MOTION
+extern const vpx_tree_index
+    vp10_global_motion_types_tree[TREE_SIZE(GLOBAL_MOTION_TYPES)];
+#endif  // CONFIG_GLOBAL_MOTION
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_ENTROPYMV_H_
diff --git a/av1/common/enums.h b/av1/common/enums.h
new file mode 100644
index 0000000..8cdec8e
--- /dev/null
+++ b/av1/common/enums.h
@@ -0,0 +1,438 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_ENUMS_H_
+#define VP10_COMMON_ENUMS_H_
+
+#include "./vpx_config.h"
+#include "aom/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#undef MAX_SB_SIZE
+
+// Max superblock size
+#if CONFIG_EXT_PARTITION
+#define MAX_SB_SIZE_LOG2 7
+#else
+#define MAX_SB_SIZE_LOG2 6
+#endif  // CONFIG_EXT_PARTITION
+#define MAX_SB_SIZE (1 << MAX_SB_SIZE_LOG2)
+#define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE)
+
+// Min superblock size
+#define MIN_SB_SIZE_LOG2 6
+
+// Pixels per Mode Info (MI) unit
+#define MI_SIZE_LOG2 3
+#define MI_SIZE (1 << MI_SIZE_LOG2)
+
+// MI-units per max superblock (MI Block - MIB)
+#define MAX_MIB_SIZE_LOG2 (MAX_SB_SIZE_LOG2 - MI_SIZE_LOG2)
+#define MAX_MIB_SIZE (1 << MAX_MIB_SIZE_LOG2)
+
+// MI-units per min superblock
+#define MIN_MIB_SIZE_LOG2 (MIN_SB_SIZE_LOG2 - MI_SIZE_LOG2)
+
+// Mask to extract MI offset within max MIB
+#define MAX_MIB_MASK (MAX_MIB_SIZE - 1)
+#define MAX_MIB_MASK_2 (MAX_MIB_SIZE * 2 - 1)
+
+// Maximum number of tile rows and tile columns
+#if CONFIG_EXT_TILE
+#define MAX_TILE_ROWS 1024
+#define MAX_TILE_COLS 1024
+#else
+#define MAX_TILE_ROWS 4
+#define MAX_TILE_COLS 64
+#endif  // CONFIG_EXT_TILE
+
+// Bitstream profiles indicated by 2-3 bits in the uncompressed header.
+// 00: Profile 0.  8-bit 4:2:0 only.
+// 10: Profile 1.  8-bit 4:4:4, 4:2:2, and 4:4:0.
+// 01: Profile 2.  10-bit and 12-bit color only, with 4:2:0 sampling.
+// 110: Profile 3. 10-bit and 12-bit color only, with 4:2:2/4:4:4/4:4:0
+//                 sampling.
+// 111: Undefined profile.
+typedef enum BITSTREAM_PROFILE {
+  PROFILE_0,
+  PROFILE_1,
+  PROFILE_2,
+  PROFILE_3,
+  MAX_PROFILES
+} BITSTREAM_PROFILE;
+
+#define BLOCK_4X4 0
+#define BLOCK_4X8 1
+#define BLOCK_8X4 2
+#define BLOCK_8X8 3
+#define BLOCK_8X16 4
+#define BLOCK_16X8 5
+#define BLOCK_16X16 6
+#define BLOCK_16X32 7
+#define BLOCK_32X16 8
+#define BLOCK_32X32 9
+#define BLOCK_32X64 10
+#define BLOCK_64X32 11
+#define BLOCK_64X64 12
+#if !CONFIG_EXT_PARTITION
+#define BLOCK_SIZES 13
+#else
+#define BLOCK_64X128 13
+#define BLOCK_128X64 14
+#define BLOCK_128X128 15
+#define BLOCK_SIZES 16
+#endif  // !CONFIG_EXT_PARTITION
+#define BLOCK_INVALID BLOCK_SIZES
+#define BLOCK_LARGEST (BLOCK_SIZES - 1)
+typedef uint8_t BLOCK_SIZE;
+
+#if CONFIG_EXT_PARTITION_TYPES
+typedef enum PARTITION_TYPE {
+  PARTITION_NONE,
+  PARTITION_HORZ,
+  PARTITION_VERT,
+  PARTITION_SPLIT,
+  PARTITION_HORZ_A,  // HORZ split and the left partition is split again
+  PARTITION_HORZ_B,  // HORZ split and the right partition is split again
+  PARTITION_VERT_A,  // VERT split and the top partition is split again
+  PARTITION_VERT_B,  // VERT split and the bottom partition is split again
+  EXT_PARTITION_TYPES,
+  PARTITION_TYPES = PARTITION_SPLIT + 1,
+  PARTITION_INVALID = EXT_PARTITION_TYPES
+} PARTITION_TYPE;
+#else
+typedef enum PARTITION_TYPE {
+  PARTITION_NONE,
+  PARTITION_HORZ,
+  PARTITION_VERT,
+  PARTITION_SPLIT,
+  PARTITION_TYPES,
+  PARTITION_INVALID = PARTITION_TYPES
+} PARTITION_TYPE;
+#endif  // CONFIG_EXT_PARTITION_TYPES
+
+typedef char PARTITION_CONTEXT;
+#define PARTITION_PLOFFSET 4  // number of probability models per block size
+#if CONFIG_EXT_PARTITION
+#define PARTITION_CONTEXTS (5 * PARTITION_PLOFFSET)
+#else
+#define PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET)
+#endif  // CONFIG_EXT_PARTITION
+
+// block transform size
+typedef uint8_t TX_SIZE;
+#define TX_4X4 ((TX_SIZE)0)    // 4x4 transform
+#define TX_8X8 ((TX_SIZE)1)    // 8x8 transform
+#define TX_16X16 ((TX_SIZE)2)  // 16x16 transform
+#define TX_32X32 ((TX_SIZE)3)  // 32x32 transform
+#define TX_SIZES ((TX_SIZE)4)
+
+#if CONFIG_EXT_TX
+#define TX_4X8 ((TX_SIZE)4)         // 4x8 transform
+#define TX_8X4 ((TX_SIZE)5)         // 8x4 transform
+#define TX_8X16 ((TX_SIZE)6)        // 8x16 transform
+#define TX_16X8 ((TX_SIZE)7)        // 16x8 transform
+#define TX_16X32 ((TX_SIZE)8)       // 16x32 transform
+#define TX_32X16 ((TX_SIZE)9)       // 32x16 transform
+#define TX_SIZES_ALL ((TX_SIZE)10)  // Includes rectangular transforms
+#else
+#define TX_SIZES_ALL ((TX_SIZE)4)
+#endif  // CONFIG_EXT_TX
+
+#define MAX_TX_SIZE_LOG2 5
+#define MAX_TX_SIZE (1 << MAX_TX_SIZE_LOG2)
+#define MIN_TX_SIZE_LOG2 2
+#define MIN_TX_SIZE (1 << MIN_TX_SIZE_LOG2)
+#define MAX_TX_SQUARE (MAX_TX_SIZE * MAX_TX_SIZE)
+
+// Number of maxium size transform blocks in the maximum size superblock
+#define MAX_TX_BLOCKS_IN_MAX_SB_LOG2 ((MAX_SB_SIZE_LOG2 - MAX_TX_SIZE_LOG2) * 2)
+#define MAX_TX_BLOCKS_IN_MAX_SB (1 << MAX_TX_BLOCKS_IN_MAX_SB_LOG2)
+
+#define MAX_NUM_TXB (1 << (MAX_SB_SIZE_LOG2 - MIN_TX_SIZE_LOG2))
+
+// frame transform mode
+typedef enum {
+  ONLY_4X4 = 0,        // only 4x4 transform used
+  ALLOW_8X8 = 1,       // allow block transform size up to 8x8
+  ALLOW_16X16 = 2,     // allow block transform size up to 16x16
+  ALLOW_32X32 = 3,     // allow block transform size up to 32x32
+  TX_MODE_SELECT = 4,  // transform specified for each block
+  TX_MODES = 5,
+} TX_MODE;
+
+// 1D tx types
+typedef enum {
+  DCT_1D = 0,
+  ADST_1D = 1,
+  FLIPADST_1D = 2,
+  IDTX_1D = 3,
+  TX_TYPES_1D = 4,
+} TX_TYPE_1D;
+
+typedef enum {
+  DCT_DCT = 0,    // DCT  in both horizontal and vertical
+  ADST_DCT = 1,   // ADST in vertical, DCT in horizontal
+  DCT_ADST = 2,   // DCT  in vertical, ADST in horizontal
+  ADST_ADST = 3,  // ADST in both directions
+#if CONFIG_EXT_TX
+  FLIPADST_DCT = 4,
+  DCT_FLIPADST = 5,
+  FLIPADST_FLIPADST = 6,
+  ADST_FLIPADST = 7,
+  FLIPADST_ADST = 8,
+  IDTX = 9,
+  V_DCT = 10,
+  H_DCT = 11,
+  V_ADST = 12,
+  H_ADST = 13,
+  V_FLIPADST = 14,
+  H_FLIPADST = 15,
+#endif  // CONFIG_EXT_TX
+  TX_TYPES,
+} TX_TYPE;
+
+#if CONFIG_EXT_TX
+#define EXT_TX_SIZES 4       // number of sizes that use extended transforms
+#define EXT_TX_SETS_INTER 4  // Sets of transform selections for INTER
+#define EXT_TX_SETS_INTRA 3  // Sets of transform selections for INTRA
+#else
+#define EXT_TX_SIZES 3  // number of sizes that use extended transforms
+#endif                  // CONFIG_EXT_TX
+
+typedef enum {
+  VPX_LAST_FLAG = 1 << 0,
+#if CONFIG_EXT_REFS
+  VPX_LAST2_FLAG = 1 << 1,
+  VPX_LAST3_FLAG = 1 << 2,
+  VPX_GOLD_FLAG = 1 << 3,
+  VPX_BWD_FLAG = 1 << 4,
+  VPX_ALT_FLAG = 1 << 5,
+  VPX_REFFRAME_ALL = (1 << 6) - 1
+#else
+  VPX_GOLD_FLAG = 1 << 1,
+  VPX_ALT_FLAG = 1 << 2,
+  VPX_REFFRAME_ALL = (1 << 3) - 1
+#endif  // CONFIG_EXT_REFS
+} VPX_REFFRAME;
+
+typedef enum { PLANE_TYPE_Y = 0, PLANE_TYPE_UV = 1, PLANE_TYPES } PLANE_TYPE;
+
+typedef enum {
+  TWO_COLORS,
+  THREE_COLORS,
+  FOUR_COLORS,
+  FIVE_COLORS,
+  SIX_COLORS,
+  SEVEN_COLORS,
+  EIGHT_COLORS,
+  PALETTE_SIZES
+} PALETTE_SIZE;
+
+typedef enum {
+  PALETTE_COLOR_ONE,
+  PALETTE_COLOR_TWO,
+  PALETTE_COLOR_THREE,
+  PALETTE_COLOR_FOUR,
+  PALETTE_COLOR_FIVE,
+  PALETTE_COLOR_SIX,
+  PALETTE_COLOR_SEVEN,
+  PALETTE_COLOR_EIGHT,
+  PALETTE_COLORS
+} PALETTE_COLOR;
+
+#define DC_PRED 0    // Average of above and left pixels
+#define V_PRED 1     // Vertical
+#define H_PRED 2     // Horizontal
+#define D45_PRED 3   // Directional 45  deg = round(arctan(1/1) * 180/pi)
+#define D135_PRED 4  // Directional 135 deg = 180 - 45
+#define D117_PRED 5  // Directional 117 deg = 180 - 63
+#define D153_PRED 6  // Directional 153 deg = 180 - 27
+#define D207_PRED 7  // Directional 207 deg = 180 + 27
+#define D63_PRED 8   // Directional 63  deg = round(arctan(2/1) * 180/pi)
+#define TM_PRED 9    // True-motion
+#define NEARESTMV 10
+#define NEARMV 11
+#define ZEROMV 12
+#define NEWMV 13
+#if CONFIG_EXT_INTER
+#define NEWFROMNEARMV 14
+#define NEAREST_NEARESTMV 15
+#define NEAREST_NEARMV 16
+#define NEAR_NEARESTMV 17
+#define NEAR_NEARMV 18
+#define NEAREST_NEWMV 19
+#define NEW_NEARESTMV 20
+#define NEAR_NEWMV 21
+#define NEW_NEARMV 22
+#define ZERO_ZEROMV 23
+#define NEW_NEWMV 24
+#define MB_MODE_COUNT 25
+#else
+#define MB_MODE_COUNT 14
+#endif  // CONFIG_EXT_INTER
+typedef uint8_t PREDICTION_MODE;
+
+#define INTRA_MODES (TM_PRED + 1)
+
+typedef enum {
+  SIMPLE_TRANSLATION = 0,
+#if CONFIG_OBMC
+  OBMC_CAUSAL,  // 2-sided OBMC
+#endif          // CONFIG_OBMC
+#if CONFIG_WARPED_MOTION
+  WARPED_CAUSAL,  // 2-sided WARPED
+#endif            // CONFIG_WARPED_MOTION
+  MOTION_VARIATIONS
+} MOTION_VARIATION;
+
+#if CONFIG_EXT_INTER
+typedef enum {
+  II_DC_PRED = 0,
+  II_V_PRED,
+  II_H_PRED,
+  II_D45_PRED,
+  II_D135_PRED,
+  II_D117_PRED,
+  II_D153_PRED,
+  II_D207_PRED,
+  II_D63_PRED,
+  II_TM_PRED,
+  INTERINTRA_MODES
+} INTERINTRA_MODE;
+
+#endif  // CONFIG_EXT_INTER
+
+#if CONFIG_EXT_INTRA
+typedef enum {
+  FILTER_DC_PRED,
+  FILTER_V_PRED,
+  FILTER_H_PRED,
+  FILTER_D45_PRED,
+  FILTER_D135_PRED,
+  FILTER_D117_PRED,
+  FILTER_D153_PRED,
+  FILTER_D207_PRED,
+  FILTER_D63_PRED,
+  FILTER_TM_PRED,
+  EXT_INTRA_MODES,
+} EXT_INTRA_MODE;
+
+#define FILTER_INTRA_MODES (FILTER_TM_PRED + 1)
+#define DIRECTIONAL_MODES (INTRA_MODES - 2)
+#endif  // CONFIG_EXT_INTRA
+
+#if CONFIG_EXT_INTER
+#define INTER_MODES (1 + NEWFROMNEARMV - NEARESTMV)
+#else
+#define INTER_MODES (1 + NEWMV - NEARESTMV)
+#endif  // CONFIG_EXT_INTER
+
+#if CONFIG_EXT_INTER
+#define INTER_COMPOUND_MODES (1 + NEW_NEWMV - NEAREST_NEARESTMV)
+#endif  // CONFIG_EXT_INTER
+
+#define SKIP_CONTEXTS 3
+
+#if CONFIG_REF_MV
+#define NMV_CONTEXTS 3
+
+#define NEWMV_MODE_CONTEXTS 7
+#define ZEROMV_MODE_CONTEXTS 2
+#define REFMV_MODE_CONTEXTS 9
+#define DRL_MODE_CONTEXTS 5
+
+#define ZEROMV_OFFSET 3
+#define REFMV_OFFSET 4
+
+#define NEWMV_CTX_MASK ((1 << ZEROMV_OFFSET) - 1)
+#define ZEROMV_CTX_MASK ((1 << (REFMV_OFFSET - ZEROMV_OFFSET)) - 1)
+#define REFMV_CTX_MASK ((1 << (8 - REFMV_OFFSET)) - 1)
+
+#define ALL_ZERO_FLAG_OFFSET 8
+#define SKIP_NEARESTMV_OFFSET 9
+#define SKIP_NEARMV_OFFSET 10
+#define SKIP_NEARESTMV_SUB8X8_OFFSET 11
+#endif
+
+#define INTER_MODE_CONTEXTS 7
+
+/* Segment Feature Masks */
+#define MAX_MV_REF_CANDIDATES 2
+
+#if CONFIG_REF_MV
+#define MAX_REF_MV_STACK_SIZE 16
+#if CONFIG_EXT_PARTITION
+#define REF_CAT_LEVEL 640
+#else
+#define REF_CAT_LEVEL 160
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_REF_MV
+
+#define INTRA_INTER_CONTEXTS 4
+#define COMP_INTER_CONTEXTS 5
+#define REF_CONTEXTS 5
+
+#if CONFIG_VAR_TX
+#define TXFM_PARTITION_CONTEXTS 9
+typedef TX_SIZE TXFM_CONTEXT;
+#endif
+
+#define NONE -1
+#define INTRA_FRAME 0
+#define LAST_FRAME 1
+
+#if CONFIG_EXT_REFS
+#define LAST2_FRAME 2
+#define LAST3_FRAME 3
+#define GOLDEN_FRAME 4
+#define BWDREF_FRAME 5
+#define ALTREF_FRAME 6
+#define LAST_REF_FRAMES (LAST3_FRAME - LAST_FRAME + 1)
+#else
+#define GOLDEN_FRAME 2
+#define ALTREF_FRAME 3
+#endif  // CONFIG_EXT_REFS
+
+#define INTER_REFS_PER_FRAME (ALTREF_FRAME - LAST_FRAME + 1)
+#define TOTAL_REFS_PER_FRAME (ALTREF_FRAME - INTRA_FRAME + 1)
+
+#define FWD_REFS (GOLDEN_FRAME - LAST_FRAME + 1)
+#define FWD_RF_OFFSET(ref) (ref - LAST_FRAME)
+#if CONFIG_EXT_REFS
+#define BWD_REFS (ALTREF_FRAME - BWDREF_FRAME + 1)
+#define BWD_RF_OFFSET(ref) (ref - BWDREF_FRAME)
+#else
+#define BWD_REFS 1
+#define BWD_RF_OFFSET(ref) (ref - ALTREF_FRAME)
+#endif  // CONFIG_EXT_REFS
+
+#define SINGLE_REFS (FWD_REFS + BWD_REFS)
+#define COMP_REFS (FWD_REFS * BWD_REFS)
+
+#if CONFIG_REF_MV
+#define MODE_CTX_REF_FRAMES (TOTAL_REFS_PER_FRAME + COMP_REFS)
+#else
+#define MODE_CTX_REF_FRAMES TOTAL_REFS_PER_FRAME
+#endif
+
+#if CONFIG_SUPERTX
+#define PARTITION_SUPERTX_CONTEXTS 2
+#define MAX_SUPERTX_BLOCK_SIZE BLOCK_32X32
+#endif  // CONFIG_SUPERTX
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_ENUMS_H_
diff --git a/av1/common/filter.c b/av1/common/filter.c
new file mode 100644
index 0000000..46eca5d
--- /dev/null
+++ b/av1/common/filter.c
@@ -0,0 +1,302 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "av1/common/filter.h"
+
+DECLARE_ALIGNED(256, static const InterpKernel,
+                bilinear_filters[SUBPEL_SHIFTS]) = {
+  { 0, 0, 0, 128, 0, 0, 0, 0 },  { 0, 0, 0, 120, 8, 0, 0, 0 },
+  { 0, 0, 0, 112, 16, 0, 0, 0 }, { 0, 0, 0, 104, 24, 0, 0, 0 },
+  { 0, 0, 0, 96, 32, 0, 0, 0 },  { 0, 0, 0, 88, 40, 0, 0, 0 },
+  { 0, 0, 0, 80, 48, 0, 0, 0 },  { 0, 0, 0, 72, 56, 0, 0, 0 },
+  { 0, 0, 0, 64, 64, 0, 0, 0 },  { 0, 0, 0, 56, 72, 0, 0, 0 },
+  { 0, 0, 0, 48, 80, 0, 0, 0 },  { 0, 0, 0, 40, 88, 0, 0, 0 },
+  { 0, 0, 0, 32, 96, 0, 0, 0 },  { 0, 0, 0, 24, 104, 0, 0, 0 },
+  { 0, 0, 0, 16, 112, 0, 0, 0 }, { 0, 0, 0, 8, 120, 0, 0, 0 }
+};
+
+#if USE_TEMPORALFILTER_12TAP
+DECLARE_ALIGNED(16, static const int16_t,
+                sub_pel_filters_temporalfilter_12[SUBPEL_SHIFTS][12]) = {
+  // intfilt 0.8
+  { 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0 },
+  { 0, 1, -1, 3, -7, 127, 8, -4, 2, -1, 0, 0 },
+  { 0, 1, -3, 5, -12, 124, 18, -8, 4, -2, 1, 0 },
+  { -1, 2, -4, 8, -17, 120, 28, -11, 6, -3, 1, -1 },
+  { -1, 2, -4, 10, -21, 114, 38, -15, 8, -4, 2, -1 },
+  { -1, 3, -5, 11, -23, 107, 49, -18, 9, -5, 2, -1 },
+  { -1, 3, -6, 12, -25, 99, 60, -21, 11, -6, 3, -1 },
+  { -1, 3, -6, 12, -25, 90, 70, -23, 12, -6, 3, -1 },
+  { -1, 3, -6, 12, -24, 80, 80, -24, 12, -6, 3, -1 },
+  { -1, 3, -6, 12, -23, 70, 90, -25, 12, -6, 3, -1 },
+  { -1, 3, -6, 11, -21, 60, 99, -25, 12, -6, 3, -1 },
+  { -1, 2, -5, 9, -18, 49, 107, -23, 11, -5, 3, -1 },
+  { -1, 2, -4, 8, -15, 38, 114, -21, 10, -4, 2, -1 },
+  { -1, 1, -3, 6, -11, 28, 120, -17, 8, -4, 2, -1 },
+  { 0, 1, -2, 4, -8, 18, 124, -12, 5, -3, 1, 0 },
+  { 0, 0, -1, 2, -4, 8, 127, -7, 3, -1, 1, 0 },
+};
+#endif  // USE_TEMPORALFILTER_12TAP
+
+#if CONFIG_EXT_INTERP
+DECLARE_ALIGNED(256, static const InterpKernel,
+                sub_pel_filters_8[SUBPEL_SHIFTS]) = {
+  // intfilt 0.575
+  { 0, 0, 0, 128, 0, 0, 0, 0 },        { 0, 1, -5, 126, 8, -3, 1, 0 },
+  { -1, 3, -10, 123, 18, -6, 2, -1 },  { -1, 4, -14, 118, 27, -9, 3, 0 },
+  { -1, 5, -16, 112, 37, -12, 4, -1 }, { -1, 5, -18, 105, 48, -14, 4, -1 },
+  { -1, 6, -19, 97, 58, -17, 5, -1 },  { -1, 6, -20, 88, 68, -18, 6, -1 },
+  { -1, 6, -19, 78, 78, -19, 6, -1 },  { -1, 6, -18, 68, 88, -20, 6, -1 },
+  { -1, 5, -17, 58, 97, -19, 6, -1 },  { -1, 4, -14, 48, 105, -18, 5, -1 },
+  { -1, 4, -12, 37, 112, -16, 5, -1 }, { 0, 3, -9, 27, 118, -14, 4, -1 },
+  { -1, 2, -6, 18, 123, -10, 3, -1 },  { 0, 1, -3, 8, 126, -5, 1, 0 },
+};
+
+#if CONFIG_EXT_INTRA
+DECLARE_ALIGNED(256, static const InterpKernel,
+                sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = {
+  // intfilt 0.8
+  { 0, 0, 0, 128, 0, 0, 0, 0 },         { -1, 2, -6, 127, 9, -4, 2, -1 },
+  { -2, 5, -12, 124, 18, -7, 4, -2 },   { -2, 7, -16, 119, 28, -11, 5, -2 },
+  { -3, 8, -19, 114, 38, -14, 7, -3 },  { -3, 9, -22, 107, 49, -17, 8, -3 },
+  { -4, 10, -23, 99, 60, -20, 10, -4 }, { -4, 11, -23, 90, 70, -22, 10, -4 },
+  { -4, 11, -23, 80, 80, -23, 11, -4 }, { -4, 10, -22, 70, 90, -23, 11, -4 },
+  { -4, 10, -20, 60, 99, -23, 10, -4 }, { -3, 8, -17, 49, 107, -22, 9, -3 },
+  { -3, 7, -14, 38, 114, -19, 8, -3 },  { -2, 5, -11, 28, 119, -16, 7, -2 },
+  { -2, 4, -7, 18, 124, -12, 5, -2 },   { -1, 2, -4, 9, 127, -6, 2, -1 },
+};
+#endif  // CONFIG_EXT_INTRA
+
+DECLARE_ALIGNED(256, static const int16_t,
+                sub_pel_filters_10sharp[SUBPEL_SHIFTS][10]) = {
+  // intfilt 0.77
+  { 0, 0, 0, 0, 128, 0, 0, 0, 0, 0 },
+  { 0, -1, 3, -6, 127, 8, -4, 2, -1, 0 },
+  { 1, -2, 5, -12, 124, 18, -7, 3, -2, 0 },
+  { 1, -3, 7, -17, 119, 28, -11, 5, -2, 1 },
+  { 1, -4, 8, -20, 114, 38, -14, 7, -3, 1 },
+  { 1, -4, 9, -22, 107, 49, -17, 8, -4, 1 },
+  { 2, -5, 10, -24, 99, 59, -20, 9, -4, 2 },
+  { 2, -5, 10, -24, 90, 70, -22, 10, -5, 2 },
+  { 2, -5, 10, -23, 80, 80, -23, 10, -5, 2 },
+  { 2, -5, 10, -22, 70, 90, -24, 10, -5, 2 },
+  { 2, -4, 9, -20, 59, 99, -24, 10, -5, 2 },
+  { 1, -4, 8, -17, 49, 107, -22, 9, -4, 1 },
+  { 1, -3, 7, -14, 38, 114, -20, 8, -4, 1 },
+  { 1, -2, 5, -11, 28, 119, -17, 7, -3, 1 },
+  { 0, -2, 3, -7, 18, 124, -12, 5, -2, 1 },
+  { 0, -1, 2, -4, 8, 127, -6, 3, -1, 0 },
+};
+
+DECLARE_ALIGNED(256, static const InterpKernel,
+                sub_pel_filters_8smooth2[SUBPEL_SHIFTS]) = {
+  // freqmultiplier = 0.35
+  { 0, 0, 0, 128, 0, 0, 0, 0 },     { -1, 8, 31, 47, 34, 10, 0, -1 },
+  { -1, 7, 29, 46, 36, 12, 0, -1 }, { -1, 6, 28, 46, 37, 13, 0, -1 },
+  { -1, 5, 26, 46, 38, 14, 1, -1 }, { -1, 4, 25, 45, 39, 16, 1, -1 },
+  { -1, 4, 23, 44, 41, 17, 1, -1 }, { -1, 3, 21, 44, 42, 18, 2, -1 },
+  { -1, 2, 20, 43, 43, 20, 2, -1 }, { -1, 2, 18, 42, 44, 21, 3, -1 },
+  { -1, 1, 17, 41, 44, 23, 4, -1 }, { -1, 1, 16, 39, 45, 25, 4, -1 },
+  { -1, 1, 14, 38, 46, 26, 5, -1 }, { -1, 0, 13, 37, 46, 28, 6, -1 },
+  { -1, 0, 12, 36, 46, 29, 7, -1 }, { -1, 0, 10, 34, 47, 31, 8, -1 },
+};
+
+DECLARE_ALIGNED(256, static const InterpKernel,
+                sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = {
+  // freqmultiplier = 0.75
+  { 0, 0, 0, 128, 0, 0, 0, 0 },     { 2, -10, 19, 95, 31, -11, 2, 0 },
+  { 2, -9, 14, 94, 37, -12, 2, 0 }, { 2, -8, 9, 92, 43, -12, 1, 1 },
+  { 2, -7, 5, 90, 49, -12, 1, 0 },  { 2, -5, 1, 86, 55, -12, 0, 1 },
+  { 1, -4, -2, 82, 61, -11, 0, 1 }, { 1, -3, -5, 77, 67, -9, -1, 1 },
+  { 1, -2, -7, 72, 72, -7, -2, 1 }, { 1, -1, -9, 67, 77, -5, -3, 1 },
+  { 1, 0, -11, 61, 82, -2, -4, 1 }, { 1, 0, -12, 55, 86, 1, -5, 2 },
+  { 0, 1, -12, 49, 90, 5, -7, 2 },  { 1, 1, -12, 43, 92, 9, -8, 2 },
+  { 0, 2, -12, 37, 94, 14, -9, 2 }, { 0, 2, -11, 31, 95, 19, -10, 2 },
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                sub_pel_filters_12sharp[SUBPEL_SHIFTS][12]) = {
+  // intfilt 0.85
+  { 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0 },
+  { 0, 1, -2, 3, -7, 127, 8, -4, 2, -1, 1, 0 },
+  { -1, 2, -3, 6, -13, 124, 18, -8, 4, -2, 2, -1 },
+  { -1, 3, -4, 8, -18, 120, 28, -12, 7, -4, 2, -1 },
+  { -1, 3, -6, 10, -21, 115, 38, -15, 8, -5, 3, -1 },
+  { -2, 4, -6, 12, -24, 108, 49, -18, 10, -6, 3, -2 },
+  { -2, 4, -7, 13, -25, 100, 60, -21, 11, -7, 4, -2 },
+  { -2, 4, -7, 13, -26, 91, 71, -24, 13, -7, 4, -2 },
+  { -2, 4, -7, 13, -25, 81, 81, -25, 13, -7, 4, -2 },
+  { -2, 4, -7, 13, -24, 71, 91, -26, 13, -7, 4, -2 },
+  { -2, 4, -7, 11, -21, 60, 100, -25, 13, -7, 4, -2 },
+  { -2, 3, -6, 10, -18, 49, 108, -24, 12, -6, 4, -2 },
+  { -1, 3, -5, 8, -15, 38, 115, -21, 10, -6, 3, -1 },
+  { -1, 2, -4, 7, -12, 28, 120, -18, 8, -4, 3, -1 },
+  { -1, 2, -2, 4, -8, 18, 124, -13, 6, -3, 2, -1 },
+  { 0, 1, -1, 2, -4, 8, 127, -7, 3, -2, 1, 0 },
+};
+#else   // CONFIG_EXT_INTERP
+
+DECLARE_ALIGNED(256, static const InterpKernel,
+                sub_pel_filters_8[SUBPEL_SHIFTS]) = {
+  // Lagrangian interpolation filter
+  { 0, 0, 0, 128, 0, 0, 0, 0 },        { 0, 1, -5, 126, 8, -3, 1, 0 },
+  { -1, 3, -10, 122, 18, -6, 2, 0 },   { -1, 4, -13, 118, 27, -9, 3, -1 },
+  { -1, 4, -16, 112, 37, -11, 4, -1 }, { -1, 5, -18, 105, 48, -14, 4, -1 },
+  { -1, 5, -19, 97, 58, -16, 5, -1 },  { -1, 6, -19, 88, 68, -18, 5, -1 },
+  { -1, 6, -19, 78, 78, -19, 6, -1 },  { -1, 5, -18, 68, 88, -19, 6, -1 },
+  { -1, 5, -16, 58, 97, -19, 5, -1 },  { -1, 4, -14, 48, 105, -18, 5, -1 },
+  { -1, 4, -11, 37, 112, -16, 4, -1 }, { -1, 3, -9, 27, 118, -13, 4, -1 },
+  { 0, 2, -6, 18, 122, -10, 3, -1 },   { 0, 1, -3, 8, 126, -5, 1, 0 }
+};
+
+DECLARE_ALIGNED(256, static const InterpKernel,
+                sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = {
+  // DCT based filter
+  { 0, 0, 0, 128, 0, 0, 0, 0 },         { -1, 3, -7, 127, 8, -3, 1, 0 },
+  { -2, 5, -13, 125, 17, -6, 3, -1 },   { -3, 7, -17, 121, 27, -10, 5, -2 },
+  { -4, 9, -20, 115, 37, -13, 6, -2 },  { -4, 10, -23, 108, 48, -16, 8, -3 },
+  { -4, 10, -24, 100, 59, -19, 9, -3 }, { -4, 11, -24, 90, 70, -21, 10, -4 },
+  { -4, 11, -23, 80, 80, -23, 11, -4 }, { -4, 10, -21, 70, 90, -24, 11, -4 },
+  { -3, 9, -19, 59, 100, -24, 10, -4 }, { -3, 8, -16, 48, 108, -23, 10, -4 },
+  { -2, 6, -13, 37, 115, -20, 9, -4 },  { -2, 5, -10, 27, 121, -17, 7, -3 },
+  { -1, 3, -6, 17, 125, -13, 5, -2 },   { 0, 1, -3, 8, 127, -7, 3, -1 }
+};
+
+DECLARE_ALIGNED(256, static const InterpKernel,
+                sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = {
+  // freqmultiplier = 0.5
+  { 0, 0, 0, 128, 0, 0, 0, 0 },       { -3, -1, 32, 64, 38, 1, -3, 0 },
+  { -2, -2, 29, 63, 41, 2, -3, 0 },   { -2, -2, 26, 63, 43, 4, -4, 0 },
+  { -2, -3, 24, 62, 46, 5, -4, 0 },   { -2, -3, 21, 60, 49, 7, -4, 0 },
+  { -1, -4, 18, 59, 51, 9, -4, 0 },   { -1, -4, 16, 57, 53, 12, -4, -1 },
+  { -1, -4, 14, 55, 55, 14, -4, -1 }, { -1, -4, 12, 53, 57, 16, -4, -1 },
+  { 0, -4, 9, 51, 59, 18, -4, -1 },   { 0, -4, 7, 49, 60, 21, -3, -2 },
+  { 0, -4, 5, 46, 62, 24, -3, -2 },   { 0, -4, 4, 43, 63, 26, -2, -2 },
+  { 0, -3, 2, 41, 63, 29, -2, -2 },   { 0, -3, 1, 38, 64, 32, -1, -3 }
+};
+#endif  // CONFIG_EXT_INTERP
+
+#if CONFIG_EXT_INTRA
+const InterpKernel *vp10_intra_filter_kernels[INTRA_FILTERS] = {
+  bilinear_filters,         // INTRA_FILTER_LINEAR
+  sub_pel_filters_8,        // INTRA_FILTER_8TAP
+  sub_pel_filters_8sharp,   // INTRA_FILTER_8TAP_SHARP
+  sub_pel_filters_8smooth,  // INTRA_FILTER_8TAP_SMOOTH
+};
+#endif  // CONFIG_EXT_INTRA
+
+#if CONFIG_EXT_INTERP
+static const InterpFilterParams
+    vp10_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = {
+      { (const int16_t *)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS },
+      { (const int16_t *)sub_pel_filters_8smooth, SUBPEL_TAPS, SUBPEL_SHIFTS },
+      { (const int16_t *)sub_pel_filters_10sharp, 10, SUBPEL_SHIFTS },
+      { (const int16_t *)sub_pel_filters_8smooth2, SUBPEL_TAPS, SUBPEL_SHIFTS },
+      { (const int16_t *)sub_pel_filters_12sharp, 12, SUBPEL_SHIFTS },
+      { (const int16_t *)bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS }
+    };
+#else
+static const InterpFilterParams
+    vp10_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = {
+      { (const int16_t *)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS },
+      { (const int16_t *)sub_pel_filters_8smooth, SUBPEL_TAPS, SUBPEL_SHIFTS },
+      { (const int16_t *)sub_pel_filters_8sharp, SUBPEL_TAPS, SUBPEL_SHIFTS },
+      { (const int16_t *)bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS }
+    };
+#endif  // CONFIG_EXT_INTERP
+
+#if USE_TEMPORALFILTER_12TAP
+static const InterpFilterParams vp10_interp_temporalfilter_12tap = {
+  (const int16_t *)sub_pel_filters_temporalfilter_12, 12, SUBPEL_SHIFTS
+};
+#endif  // USE_TEMPORALFILTER_12TAP
+
+InterpFilterParams vp10_get_interp_filter_params(
+    const INTERP_FILTER interp_filter) {
+#if USE_TEMPORALFILTER_12TAP
+  if (interp_filter == TEMPORALFILTER_12TAP)
+    return vp10_interp_temporalfilter_12tap;
+#endif  // USE_TEMPORALFILTER_12TAP
+  return vp10_interp_filter_params_list[interp_filter];
+}
+
+const int16_t *vp10_get_interp_filter_kernel(
+    const INTERP_FILTER interp_filter) {
+#if USE_TEMPORALFILTER_12TAP
+  if (interp_filter == TEMPORALFILTER_12TAP)
+    return vp10_interp_temporalfilter_12tap.filter_ptr;
+#endif  // USE_TEMPORALFILTER_12TAP
+  return (const int16_t *)vp10_interp_filter_params_list[interp_filter]
+      .filter_ptr;
+}
+
+SubpelFilterCoeffs vp10_get_subpel_filter_signal_dir(const InterpFilterParams p,
+                                                     int index) {
+#if CONFIG_EXT_INTERP && HAVE_SSSE3
+  if (p.filter_ptr == (const int16_t *)sub_pel_filters_12sharp) {
+    return &sub_pel_filters_12sharp_signal_dir[index][0];
+  }
+  if (p.filter_ptr == (const int16_t *)sub_pel_filters_10sharp) {
+    return &sub_pel_filters_10sharp_signal_dir[index][0];
+  }
+#endif
+#if USE_TEMPORALFILTER_12TAP && HAVE_SSSE3
+  if (p.filter_ptr == (const int16_t *)sub_pel_filters_temporalfilter_12) {
+    return &sub_pel_filters_temporalfilter_12_signal_dir[index][0];
+  }
+#endif
+  (void)p;
+  (void)index;
+  return NULL;
+}
+
+SubpelFilterCoeffs vp10_get_subpel_filter_ver_signal_dir(
+    const InterpFilterParams p, int index) {
+#if CONFIG_EXT_INTERP && HAVE_SSSE3
+  if (p.filter_ptr == (const int16_t *)sub_pel_filters_12sharp) {
+    return &sub_pel_filters_12sharp_ver_signal_dir[index][0];
+  }
+  if (p.filter_ptr == (const int16_t *)sub_pel_filters_10sharp) {
+    return &sub_pel_filters_10sharp_ver_signal_dir[index][0];
+  }
+#endif
+#if USE_TEMPORALFILTER_12TAP && HAVE_SSSE3
+  if (p.filter_ptr == (const int16_t *)sub_pel_filters_temporalfilter_12) {
+    return &sub_pel_filters_temporalfilter_12_ver_signal_dir[index][0];
+  }
+#endif
+  (void)p;
+  (void)index;
+  return NULL;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+HbdSubpelFilterCoeffs vp10_hbd_get_subpel_filter_ver_signal_dir(
+    const InterpFilterParams p, int index) {
+#if CONFIG_EXT_INTERP && HAVE_SSE4_1
+  if (p.filter_ptr == (const int16_t *)sub_pel_filters_12sharp) {
+    return &sub_pel_filters_12sharp_highbd_ver_signal_dir[index][0];
+  }
+  if (p.filter_ptr == (const int16_t *)sub_pel_filters_10sharp) {
+    return &sub_pel_filters_10sharp_highbd_ver_signal_dir[index][0];
+  }
+#endif
+#if USE_TEMPORALFILTER_12TAP && HAVE_SSE4_1
+  if (p.filter_ptr == (const int16_t *)sub_pel_filters_temporalfilter_12) {
+    return &sub_pel_filters_temporalfilter_12_highbd_ver_signal_dir[index][0];
+  }
+#endif
+  (void)p;
+  (void)index;
+  return NULL;
+}
+#endif
diff --git a/av1/common/filter.h b/av1/common/filter.h
new file mode 100644
index 0000000..39fad23
--- /dev/null
+++ b/av1/common/filter.h
@@ -0,0 +1,132 @@
+/*
+ *  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_FILTER_H_
+#define VP10_COMMON_FILTER_H_
+
+#include "./vpx_config.h"
+#include "aom/vpx_integer.h"
+#include "aom_dsp/vpx_filter.h"
+#include "aom_ports/mem.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define EIGHTTAP_REGULAR 0
+#define EIGHTTAP_SMOOTH 1
+#define MULTITAP_SHARP 2
+
+#if CONFIG_EXT_INTERP
+#define EIGHTTAP_SMOOTH2 3
+#define MULTITAP_SHARP2 4
+
+#define MAX_SUBPEL_TAPS 12
+
+#define SUPPORT_NONINTERPOLATING_FILTERS 0 /* turn on for experimentation */
+#define SWITCHABLE_FILTERS 5               /* Number of switchable filters */
+#else
+#define SWITCHABLE_FILTERS 3 /* Number of switchable filters */
+#endif                       // CONFIG_EXT_INTERP
+
+#define USE_TEMPORALFILTER_12TAP 1
+#if USE_TEMPORALFILTER_12TAP
+#define TEMPORALFILTER_12TAP (SWITCHABLE_FILTERS + 1)
+#endif
+
+// The codec can operate in four possible inter prediction filter mode:
+// 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three.
+
+#define BILINEAR (SWITCHABLE_FILTERS)
+#define SWITCHABLE (SWITCHABLE_FILTERS + 1) /* the last one */
+#if CONFIG_DUAL_FILTER
+#define SWITCHABLE_FILTER_CONTEXTS ((SWITCHABLE_FILTERS + 1) * 4)
+#define INTER_FILTER_COMP_OFFSET (SWITCHABLE_FILTERS + 1)
+#define INTER_FILTER_DIR_OFFSET ((SWITCHABLE_FILTERS + 1) * 2)
+#else
+#define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1)
+#endif
+
+typedef uint8_t INTERP_FILTER;
+
+#if CONFIG_EXT_INTRA
+typedef enum {
+  INTRA_FILTER_LINEAR,
+  INTRA_FILTER_8TAP,
+  INTRA_FILTER_8TAP_SHARP,
+  INTRA_FILTER_8TAP_SMOOTH,
+  INTRA_FILTERS,
+} INTRA_FILTER;
+
+extern const InterpKernel *vp10_intra_filter_kernels[INTRA_FILTERS];
+#endif  // CONFIG_EXT_INTRA
+
+typedef struct InterpFilterParams {
+  const int16_t *filter_ptr;
+  uint16_t taps;
+  uint16_t subpel_shifts;
+} InterpFilterParams;
+
+InterpFilterParams vp10_get_interp_filter_params(
+    const INTERP_FILTER interp_filter);
+
+const int16_t *vp10_get_interp_filter_kernel(const INTERP_FILTER interp_filter);
+
+static INLINE const int16_t *vp10_get_interp_filter_subpel_kernel(
+    const InterpFilterParams filter_params, const int subpel) {
+  return filter_params.filter_ptr + filter_params.taps * subpel;
+}
+
+static INLINE int vp10_is_interpolating_filter(
+    const INTERP_FILTER interp_filter) {
+  const InterpFilterParams ip = vp10_get_interp_filter_params(interp_filter);
+  return (ip.filter_ptr[ip.taps / 2 - 1] == 128);
+}
+
+#if USE_TEMPORALFILTER_12TAP
+extern const int8_t sub_pel_filters_temporalfilter_12_signal_dir[15][2][16];
+extern const int8_t sub_pel_filters_temporalfilter_12_ver_signal_dir[15][6][16];
+#if CONFIG_VP9_HIGHBITDEPTH
+extern const int16_t
+    sub_pel_filters_temporalfilter_12_highbd_ver_signal_dir[15][6][8];
+#endif
+#endif
+
+#if CONFIG_EXT_INTERP
+extern const int8_t sub_pel_filters_12sharp_signal_dir[15][2][16];
+extern const int8_t sub_pel_filters_10sharp_signal_dir[15][2][16];
+extern const int8_t sub_pel_filters_12sharp_ver_signal_dir[15][6][16];
+extern const int8_t sub_pel_filters_10sharp_ver_signal_dir[15][6][16];
+#if CONFIG_VP9_HIGHBITDEPTH
+extern const int16_t sub_pel_filters_12sharp_highbd_ver_signal_dir[15][6][8];
+extern const int16_t sub_pel_filters_10sharp_highbd_ver_signal_dir[15][6][8];
+#endif
+#endif
+
+typedef const int8_t (*SubpelFilterCoeffs)[16];
+#if CONFIG_VP9_HIGHBITDEPTH
+typedef const int16_t (*HbdSubpelFilterCoeffs)[8];
+#endif
+
+SubpelFilterCoeffs vp10_get_subpel_filter_signal_dir(const InterpFilterParams p,
+                                                     int index);
+
+SubpelFilterCoeffs vp10_get_subpel_filter_ver_signal_dir(
+    const InterpFilterParams p, int index);
+#if CONFIG_VP9_HIGHBITDEPTH
+HbdSubpelFilterCoeffs vp10_hbd_get_subpel_filter_ver_signal_dir(
+    const InterpFilterParams p, int index);
+#endif
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_FILTER_H_
diff --git a/av1/common/frame_buffers.c b/av1/common/frame_buffers.c
new file mode 100644
index 0000000..5c736a9
--- /dev/null
+++ b/av1/common/frame_buffers.c
@@ -0,0 +1,80 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "av1/common/frame_buffers.h"
+#include "aom_mem/vpx_mem.h"
+
+int vp10_alloc_internal_frame_buffers(InternalFrameBufferList *list) {
+  assert(list != NULL);
+  vp10_free_internal_frame_buffers(list);
+
+  list->num_internal_frame_buffers =
+      VPX_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS;
+  list->int_fb = (InternalFrameBuffer *)vpx_calloc(
+      list->num_internal_frame_buffers, sizeof(*list->int_fb));
+  return (list->int_fb == NULL);
+}
+
+void vp10_free_internal_frame_buffers(InternalFrameBufferList *list) {
+  int i;
+
+  assert(list != NULL);
+
+  for (i = 0; i < list->num_internal_frame_buffers; ++i) {
+    vpx_free(list->int_fb[i].data);
+    list->int_fb[i].data = NULL;
+  }
+  vpx_free(list->int_fb);
+  list->int_fb = NULL;
+}
+
+int vp10_get_frame_buffer(void *cb_priv, size_t min_size,
+                          vpx_codec_frame_buffer_t *fb) {
+  int i;
+  InternalFrameBufferList *const int_fb_list =
+      (InternalFrameBufferList *)cb_priv;
+  if (int_fb_list == NULL) return -1;
+
+  // Find a free frame buffer.
+  for (i = 0; i < int_fb_list->num_internal_frame_buffers; ++i) {
+    if (!int_fb_list->int_fb[i].in_use) break;
+  }
+
+  if (i == int_fb_list->num_internal_frame_buffers) return -1;
+
+  if (int_fb_list->int_fb[i].size < min_size) {
+    int_fb_list->int_fb[i].data =
+        (uint8_t *)vpx_realloc(int_fb_list->int_fb[i].data, min_size);
+    if (!int_fb_list->int_fb[i].data) return -1;
+
+    // This memset is needed for fixing valgrind error from C loop filter
+    // due to access uninitialized memory in frame border. It could be
+    // removed if border is totally removed.
+    memset(int_fb_list->int_fb[i].data, 0, min_size);
+    int_fb_list->int_fb[i].size = min_size;
+  }
+
+  fb->data = int_fb_list->int_fb[i].data;
+  fb->size = int_fb_list->int_fb[i].size;
+  int_fb_list->int_fb[i].in_use = 1;
+
+  // Set the frame buffer's private data to point at the internal frame buffer.
+  fb->priv = &int_fb_list->int_fb[i];
+  return 0;
+}
+
+int vp10_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb) {
+  InternalFrameBuffer *const int_fb = (InternalFrameBuffer *)fb->priv;
+  (void)cb_priv;
+  if (int_fb) int_fb->in_use = 0;
+  return 0;
+}
diff --git a/av1/common/frame_buffers.h b/av1/common/frame_buffers.h
new file mode 100644
index 0000000..6667132
--- /dev/null
+++ b/av1/common/frame_buffers.h
@@ -0,0 +1,53 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_FRAME_BUFFERS_H_
+#define VP10_COMMON_FRAME_BUFFERS_H_
+
+#include "aom/vpx_frame_buffer.h"
+#include "aom/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct InternalFrameBuffer {
+  uint8_t *data;
+  size_t size;
+  int in_use;
+} InternalFrameBuffer;
+
+typedef struct InternalFrameBufferList {
+  int num_internal_frame_buffers;
+  InternalFrameBuffer *int_fb;
+} InternalFrameBufferList;
+
+// Initializes |list|. Returns 0 on success.
+int vp10_alloc_internal_frame_buffers(InternalFrameBufferList *list);
+
+// Free any data allocated to the frame buffers.
+void vp10_free_internal_frame_buffers(InternalFrameBufferList *list);
+
+// Callback used by libaom to request an external frame buffer. |cb_priv|
+// Callback private data, which points to an InternalFrameBufferList.
+// |min_size| is the minimum size in bytes needed to decode the next frame.
+// |fb| pointer to the frame buffer.
+int vp10_get_frame_buffer(void *cb_priv, size_t min_size,
+                          vpx_codec_frame_buffer_t *fb);
+
+// Callback used by libaom when there are no references to the frame buffer.
+// |cb_priv| is not used. |fb| pointer to the frame buffer.
+int vp10_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_FRAME_BUFFERS_H_
diff --git a/av1/common/idct.c b/av1/common/idct.c
new file mode 100644
index 0000000..83b44d5
--- /dev/null
+++ b/av1/common/idct.c
@@ -0,0 +1,2230 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+
+#include "./vp10_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+#include "av1/common/blockd.h"
+#include "av1/common/enums.h"
+#include "av1/common/idct.h"
+#include "av1/common/vp10_inv_txfm2d_cfg.h"
+#include "aom_dsp/inv_txfm.h"
+#include "aom_ports/mem.h"
+
+int get_tx_scale(const MACROBLOCKD *const xd, const TX_TYPE tx_type,
+                 const TX_SIZE tx_size) {
+  (void)tx_type;
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    return txsize_sqr_up_map[tx_size] == TX_32X32;
+  }
+#else
+  (void)xd;
+#endif
+  return txsize_sqr_up_map[tx_size] == TX_32X32;
+}
+
+#if CONFIG_EXT_TX
+static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  for (i = 0; i < 4; ++i)
+    output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
+}
+
+static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
+}
+
+static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  for (i = 0; i < 16; ++i)
+    output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
+}
+
+static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
+}
+
+// For use in lieu of DST
+static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  tran_low_t inputhalf[16];
+  for (i = 0; i < 16; ++i) {
+    output[i] = input[16 + i] * 4;
+  }
+  // Multiply input by sqrt(2)
+  for (i = 0; i < 16; ++i) {
+    inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
+  }
+  idct16_c(inputhalf, output + 16);
+  // Note overall scaling factor is 4 times orthogonal
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_iidtx4_c(const tran_low_t *input, tran_low_t *output,
+                            int bd) {
+  int i;
+  for (i = 0; i < 4; ++i)
+    output[i] =
+        HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * Sqrt2), bd);
+}
+
+static void highbd_iidtx8_c(const tran_low_t *input, tran_low_t *output,
+                            int bd) {
+  int i;
+  (void)bd;
+  for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
+}
+
+static void highbd_iidtx16_c(const tran_low_t *input, tran_low_t *output,
+                             int bd) {
+  int i;
+  for (i = 0; i < 16; ++i)
+    output[i] =
+        HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * 2 * Sqrt2), bd);
+}
+
+static void highbd_iidtx32_c(const tran_low_t *input, tran_low_t *output,
+                             int bd) {
+  int i;
+  (void)bd;
+  for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
+}
+
+static void highbd_ihalfright32_c(const tran_low_t *input, tran_low_t *output,
+                                  int bd) {
+  int i;
+  tran_low_t inputhalf[16];
+  for (i = 0; i < 16; ++i) {
+    output[i] = input[16 + i] * 4;
+  }
+  // Multiply input by sqrt(2)
+  for (i = 0; i < 16; ++i) {
+    inputhalf[i] =
+        HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * Sqrt2), bd);
+  }
+  vpx_highbd_idct16_c(inputhalf, output + 16, bd);
+  // Note overall scaling factor is 4 times orthogonal
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+// Inverse identity transform and add.
+static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+                           int bs, int tx_type) {
+  int r, c;
+  const int shift = bs < 32 ? 3 : 2;
+  if (tx_type == IDTX) {
+    for (r = 0; r < bs; ++r) {
+      for (c = 0; c < bs; ++c)
+        dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
+      dest += stride;
+      input += bs;
+    }
+  }
+}
+
+#define FLIPUD_PTR(dest, stride, size)       \
+  do {                                       \
+    (dest) = (dest) + ((size)-1) * (stride); \
+    (stride) = -(stride);                    \
+  } while (0)
+
+static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
+                               int *sstride, int tx_type, int sizey,
+                               int sizex) {
+  // Note that the transpose of src will be added to dst. In order to LR
+  // flip the addends (in dst coordinates), we UD flip the src. To UD flip
+  // the addends, we UD flip the dst.
+  switch (tx_type) {
+    case DCT_DCT:
+    case ADST_DCT:
+    case DCT_ADST:
+    case ADST_ADST:
+    case IDTX:
+    case V_DCT:
+    case H_DCT:
+    case V_ADST:
+    case H_ADST: break;
+    case FLIPADST_DCT:
+    case FLIPADST_ADST:
+    case V_FLIPADST:
+      // flip UD
+      FLIPUD_PTR(*dst, *dstride, sizey);
+      break;
+    case DCT_FLIPADST:
+    case ADST_FLIPADST:
+    case H_FLIPADST:
+      // flip LR
+      FLIPUD_PTR(*src, *sstride, sizex);
+      break;
+    case FLIPADST_FLIPADST:
+      // flip UD
+      FLIPUD_PTR(*dst, *dstride, sizey);
+      // flip LR
+      FLIPUD_PTR(*src, *sstride, sizex);
+      break;
+    default: assert(0); break;
+  }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void highbd_idst4_c(const tran_low_t *input, tran_low_t *output, int bd) {
+  tran_low_t step[4];
+  tran_high_t temp1, temp2;
+  (void)bd;
+  // stage 1
+  temp1 = (input[3] + input[1]) * cospi_16_64;
+  temp2 = (input[3] - input[1]) * cospi_16_64;
+  step[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+  step[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+  temp1 = input[2] * cospi_24_64 - input[0] * cospi_8_64;
+  temp2 = input[2] * cospi_8_64 + input[0] * cospi_24_64;
+  step[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+  step[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+
+  // stage 2
+  output[0] = HIGHBD_WRAPLOW(step[0] + step[3], bd);
+  output[1] = HIGHBD_WRAPLOW(-step[1] - step[2], bd);
+  output[2] = HIGHBD_WRAPLOW(step[1] - step[2], bd);
+  output[3] = HIGHBD_WRAPLOW(step[3] - step[0], bd);
+}
+
+void highbd_idst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
+  tran_low_t step1[8], step2[8];
+  tran_high_t temp1, temp2;
+  (void)bd;
+  // stage 1
+  step1[0] = input[7];
+  step1[2] = input[3];
+  step1[1] = input[5];
+  step1[3] = input[1];
+  temp1 = input[6] * cospi_28_64 - input[0] * cospi_4_64;
+  temp2 = input[6] * cospi_4_64 + input[0] * cospi_28_64;
+  step1[4] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+  step1[7] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+  temp1 = input[2] * cospi_12_64 - input[4] * cospi_20_64;
+  temp2 = input[2] * cospi_20_64 + input[4] * cospi_12_64;
+  step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+  step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+
+  // stage 2
+  temp1 = (step1[0] + step1[2]) * cospi_16_64;
+  temp2 = (step1[0] - step1[2]) * cospi_16_64;
+  step2[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+  step2[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+  temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64;
+  temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64;
+  step2[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+  step2[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+  step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd);
+  step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd);
+  step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd);
+  step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd);
+
+  // stage 3
+  step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd);
+  step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd);
+  step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd);
+  step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd);
+  step1[4] = step2[4];
+  temp1 = (step2[6] - step2[5]) * cospi_16_64;
+  temp2 = (step2[5] + step2[6]) * cospi_16_64;
+  step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+  step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+  step1[7] = step2[7];
+
+  // stage 4
+  output[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd);
+  output[1] = HIGHBD_WRAPLOW(-step1[1] - step1[6], bd);
+  output[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd);
+  output[3] = HIGHBD_WRAPLOW(-step1[3] - step1[4], bd);
+  output[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd);
+  output[5] = HIGHBD_WRAPLOW(-step1[2] + step1[5], bd);
+  output[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd);
+  output[7] = HIGHBD_WRAPLOW(-step1[0] + step1[7], bd);
+}
+
+void highbd_idst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
+  // vp9_highbd_igentx16(input, output, bd, Tx16);
+  tran_low_t step1[16], step2[16];
+  tran_high_t temp1, temp2;
+  (void)bd;
+
+  // stage 1
+  step1[0] = input[15];
+  step1[1] = input[7];
+  step1[2] = input[11];
+  step1[3] = input[3];
+  step1[4] = input[13];
+  step1[5] = input[5];
+  step1[6] = input[9];
+  step1[7] = input[1];
+  step1[8] = input[14];
+  step1[9] = input[6];
+  step1[10] = input[10];
+  step1[11] = input[2];
+  step1[12] = input[12];
+  step1[13] = input[4];
+  step1[14] = input[8];
+  step1[15] = input[0];
+
+  // stage 2
+  step2[0] = step1[0];
+  step2[1] = step1[1];
+  step2[2] = step1[2];
+  step2[3] = step1[3];
+  step2[4] = step1[4];
+  step2[5] = step1[5];
+  step2[6] = step1[6];
+  step2[7] = step1[7];
+
+  temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
+  temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
+  step2[8] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+  step2[15] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+
+  temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
+  temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
+  step2[9] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+  step2[14] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+
+  temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
+  temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
+  step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+  step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+
+  temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
+  temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
+  step2[11] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+  step2[12] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+
+  // stage 3
+  step1[0] = step2[0];
+  step1[1] = step2[1];
+  step1[2] = step2[2];
+  step1[3] = step2[3];
+
+  temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
+  temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
+  step1[4] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+  step1[7] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+  temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
+  temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
+  step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+  step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+
+  step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd);
+  step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd);
+  step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd);
+  step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd);
+  step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd);
+  step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd);
+  step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd);
+  step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd);
+
+  // stage 4
+  temp1 = (step1[0] + step1[1]) * cospi_16_64;
+  temp2 = (step1[0] - step1[1]) * cospi_16_64;
+  step2[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+  step2[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+  temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
+  temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
+  step2[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+  step2[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+  step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd);
+  step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd);
+  step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd);
+  step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd);
+
+  step2[8] = step1[8];
+  step2[15] = step1[15];
+  temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
+  temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
+  step2[9] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+  step2[14] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+  temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
+  temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
+  step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+  step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+  step2[11] = step1[11];
+  step2[12] = step1[12];
+
+  // stage 5
+  step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd);
+  step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd);
+  step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd);
+  step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd);
+  step1[4] = step2[4];
+  temp1 = (step2[6] - step2[5]) * cospi_16_64;
+  temp2 = (step2[5] + step2[6]) * cospi_16_64;
+  step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+  step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+  step1[7] = step2[7];
+
+  step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd);
+  step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd);
+  step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd);
+  step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd);
+  step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd);
+  step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd);
+  step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd);
+  step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd);
+
+  // stage 6
+  step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd);
+  step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd);
+  step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd);
+  step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd);
+  step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd);
+  step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd);
+  step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd);
+  step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd);
+  step2[8] = step1[8];
+  step2[9] = step1[9];
+  temp1 = (-step1[10] + step1[13]) * cospi_16_64;
+  temp2 = (step1[10] + step1[13]) * cospi_16_64;
+  step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+  step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+  temp1 = (-step1[11] + step1[12]) * cospi_16_64;
+  temp2 = (step1[11] + step1[12]) * cospi_16_64;
+  step2[11] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+  step2[12] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+  step2[14] = step1[14];
+  step2[15] = step1[15];
+
+  // stage 7
+  output[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd);
+  output[1] = HIGHBD_WRAPLOW(-step2[1] - step2[14], bd);
+  output[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd);
+  output[3] = HIGHBD_WRAPLOW(-step2[3] - step2[12], bd);
+  output[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd);
+  output[5] = HIGHBD_WRAPLOW(-step2[5] - step2[10], bd);
+  output[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd);
+  output[7] = HIGHBD_WRAPLOW(-step2[7] - step2[8], bd);
+  output[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd);
+  output[9] = HIGHBD_WRAPLOW(-step2[6] + step2[9], bd);
+  output[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd);
+  output[11] = HIGHBD_WRAPLOW(-step2[4] + step2[11], bd);
+  output[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd);
+  output[13] = HIGHBD_WRAPLOW(-step2[2] + step2[13], bd);
+  output[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd);
+  output[15] = HIGHBD_WRAPLOW(-step2[0] + step2[15], bd);
+}
+
+static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
+                                  int stride, int bs, int tx_type, int bd) {
+  int r, c;
+  const int shift = bs < 32 ? 3 : 2;
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+  if (tx_type == IDTX) {
+    for (r = 0; r < bs; ++r) {
+      for (c = 0; c < bs; ++c)
+        dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
+      dest += stride;
+      input += bs;
+    }
+  }
+}
+
+static void maybe_flip_strides16(uint16_t **dst, int *dstride, tran_low_t **src,
+                                 int *sstride, int tx_type, int sizey,
+                                 int sizex) {
+  // Note that the transpose of src will be added to dst. In order to LR
+  // flip the addends (in dst coordinates), we UD flip the src. To UD flip
+  // the addends, we UD flip the dst.
+  switch (tx_type) {
+    case DCT_DCT:
+    case ADST_DCT:
+    case DCT_ADST:
+    case ADST_ADST:
+    case IDTX:
+    case V_DCT:
+    case H_DCT:
+    case V_ADST:
+    case H_ADST: break;
+    case FLIPADST_DCT:
+    case FLIPADST_ADST:
+    case V_FLIPADST:
+      // flip UD
+      FLIPUD_PTR(*dst, *dstride, sizey);
+      break;
+    case DCT_FLIPADST:
+    case ADST_FLIPADST:
+    case H_FLIPADST:
+      // flip LR
+      FLIPUD_PTR(*src, *sstride, sizex);
+      break;
+    case FLIPADST_FLIPADST:
+      // flip UD
+      FLIPUD_PTR(*dst, *dstride, sizey);
+      // flip LR
+      FLIPUD_PTR(*src, *sstride, sizex);
+      break;
+    default: assert(0); break;
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // CONFIG_EXT_TX
+
+void vp10_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+                          int tx_type) {
+  static const transform_2d IHT_4[] = {
+    { idct4_c, idct4_c },    // DCT_DCT
+    { iadst4_c, idct4_c },   // ADST_DCT
+    { idct4_c, iadst4_c },   // DCT_ADST
+    { iadst4_c, iadst4_c },  // ADST_ADST
+#if CONFIG_EXT_TX
+    { iadst4_c, idct4_c },   // FLIPADST_DCT
+    { idct4_c, iadst4_c },   // DCT_FLIPADST
+    { iadst4_c, iadst4_c },  // FLIPADST_FLIPADST
+    { iadst4_c, iadst4_c },  // ADST_FLIPADST
+    { iadst4_c, iadst4_c },  // FLIPADST_ADST
+    { iidtx4_c, iidtx4_c },  // IDTX
+    { idct4_c, iidtx4_c },   // V_DCT
+    { iidtx4_c, idct4_c },   // H_DCT
+    { iadst4_c, iidtx4_c },  // V_ADST
+    { iidtx4_c, iadst4_c },  // H_ADST
+    { iadst4_c, iidtx4_c },  // V_FLIPADST
+    { iidtx4_c, iadst4_c },  // H_FLIPADST
+#endif                       // CONFIG_EXT_TX
+  };
+
+  int i, j;
+  tran_low_t tmp;
+  tran_low_t out[4][4];
+  tran_low_t *outp = &out[0][0];
+  int outstride = 4;
+
+  // inverse transform row vectors
+  for (i = 0; i < 4; ++i) {
+    IHT_4[tx_type].rows(input, out[i]);
+    input += 4;
+  }
+
+  // transpose
+  for (i = 1; i < 4; i++) {
+    for (j = 0; j < i; j++) {
+      tmp = out[i][j];
+      out[i][j] = out[j][i];
+      out[j][i] = tmp;
+    }
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < 4; ++i) {
+    IHT_4[tx_type].cols(out[i], out[i]);
+  }
+
+#if CONFIG_EXT_TX
+  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
+#endif
+
+  // Sum with the destination
+  for (i = 0; i < 4; ++i) {
+    for (j = 0; j < 4; ++j) {
+      int d = i * stride + j;
+      int s = j * outstride + i;
+      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
+    }
+  }
+}
+
+#if CONFIG_EXT_TX
+void vp10_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+                          int tx_type) {
+  static const transform_2d IHT_4x8[] = {
+    { idct8_c, idct4_c },    // DCT_DCT
+    { iadst8_c, idct4_c },   // ADST_DCT
+    { idct8_c, iadst4_c },   // DCT_ADST
+    { iadst8_c, iadst4_c },  // ADST_ADST
+    { iadst8_c, idct4_c },   // FLIPADST_DCT
+    { idct8_c, iadst4_c },   // DCT_FLIPADST
+    { iadst8_c, iadst4_c },  // FLIPADST_FLIPADST
+    { iadst8_c, iadst4_c },  // ADST_FLIPADST
+    { iadst8_c, iadst4_c },  // FLIPADST_ADST
+    { iidtx8_c, iidtx4_c },  // IDTX
+    { idct8_c, iidtx4_c },   // V_DCT
+    { iidtx8_c, idct4_c },   // H_DCT
+    { iadst8_c, iidtx4_c },  // V_ADST
+    { iidtx8_c, iadst4_c },  // H_ADST
+    { iadst8_c, iidtx4_c },  // V_FLIPADST
+    { iidtx8_c, iadst4_c },  // H_FLIPADST
+  };
+
+  const int n = 4;
+  const int n2 = 8;
+  int i, j;
+  tran_low_t out[4][8], outtmp[4];
+  tran_low_t *outp = &out[0][0];
+  int outstride = n2;
+
+  // inverse transform row vectors and transpose
+  for (i = 0; i < n2; ++i) {
+    IHT_4x8[tx_type].rows(input, outtmp);
+    for (j = 0; j < n; ++j)
+      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
+    input += n;
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < n; ++i) {
+    IHT_4x8[tx_type].cols(out[i], out[i]);
+  }
+
+  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
+
+  // Sum with the destination
+  for (i = 0; i < n2; ++i) {
+    for (j = 0; j < n; ++j) {
+      int d = i * stride + j;
+      int s = j * outstride + i;
+      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
+    }
+  }
+}
+
+void vp10_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+                          int tx_type) {
+  static const transform_2d IHT_8x4[] = {
+    { idct4_c, idct8_c },    // DCT_DCT
+    { iadst4_c, idct8_c },   // ADST_DCT
+    { idct4_c, iadst8_c },   // DCT_ADST
+    { iadst4_c, iadst8_c },  // ADST_ADST
+    { iadst4_c, idct8_c },   // FLIPADST_DCT
+    { idct4_c, iadst8_c },   // DCT_FLIPADST
+    { iadst4_c, iadst8_c },  // FLIPADST_FLIPADST
+    { iadst4_c, iadst8_c },  // ADST_FLIPADST
+    { iadst4_c, iadst8_c },  // FLIPADST_ADST
+    { iidtx4_c, iidtx8_c },  // IDTX
+    { idct4_c, iidtx8_c },   // V_DCT
+    { iidtx4_c, idct8_c },   // H_DCT
+    { iadst4_c, iidtx8_c },  // V_ADST
+    { iidtx4_c, iadst8_c },  // H_ADST
+    { iadst4_c, iidtx8_c },  // V_FLIPADST
+    { iidtx4_c, iadst8_c },  // H_FLIPADST
+  };
+  const int n = 4;
+  const int n2 = 8;
+
+  int i, j;
+  tran_low_t out[8][4], outtmp[8];
+  tran_low_t *outp = &out[0][0];
+  int outstride = n;
+
+  // inverse transform row vectors and transpose
+  for (i = 0; i < n; ++i) {
+    IHT_8x4[tx_type].rows(input, outtmp);
+    for (j = 0; j < n2; ++j)
+      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
+    input += n2;
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < n2; ++i) {
+    IHT_8x4[tx_type].cols(out[i], out[i]);
+  }
+
+  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
+
+  // Sum with the destination
+  for (i = 0; i < n; ++i) {
+    for (j = 0; j < n2; ++j) {
+      int d = i * stride + j;
+      int s = j * outstride + i;
+      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
+    }
+  }
+}
+
+void vp10_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+                            int tx_type) {
+  static const transform_2d IHT_8x16[] = {
+    { idct16_c, idct8_c },    // DCT_DCT
+    { iadst16_c, idct8_c },   // ADST_DCT
+    { idct16_c, iadst8_c },   // DCT_ADST
+    { iadst16_c, iadst8_c },  // ADST_ADST
+    { iadst16_c, idct8_c },   // FLIPADST_DCT
+    { idct16_c, iadst8_c },   // DCT_FLIPADST
+    { iadst16_c, iadst8_c },  // FLIPADST_FLIPADST
+    { iadst16_c, iadst8_c },  // ADST_FLIPADST
+    { iadst16_c, iadst8_c },  // FLIPADST_ADST
+    { iidtx16_c, iidtx8_c },  // IDTX
+    { idct16_c, iidtx8_c },   // V_DCT
+    { iidtx16_c, idct8_c },   // H_DCT
+    { iadst16_c, iidtx8_c },  // V_ADST
+    { iidtx16_c, iadst8_c },  // H_ADST
+    { iadst16_c, iidtx8_c },  // V_FLIPADST
+    { iidtx16_c, iadst8_c },  // H_FLIPADST
+  };
+
+  const int n = 8;
+  const int n2 = 16;
+  int i, j;
+  tran_low_t out[8][16], outtmp[8];
+  tran_low_t *outp = &out[0][0];
+  int outstride = n2;
+
+  // inverse transform row vectors and transpose
+  for (i = 0; i < n2; ++i) {
+    IHT_8x16[tx_type].rows(input, outtmp);
+    for (j = 0; j < n; ++j)
+      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
+    input += n;
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < n; ++i) {
+    IHT_8x16[tx_type].cols(out[i], out[i]);
+  }
+
+  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
+
+  // Sum with the destination
+  for (i = 0; i < n2; ++i) {
+    for (j = 0; j < n; ++j) {
+      int d = i * stride + j;
+      int s = j * outstride + i;
+      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
+    }
+  }
+}
+
+void vp10_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+                            int tx_type) {
+  static const transform_2d IHT_16x8[] = {
+    { idct8_c, idct16_c },    // DCT_DCT
+    { iadst8_c, idct16_c },   // ADST_DCT
+    { idct8_c, iadst16_c },   // DCT_ADST
+    { iadst8_c, iadst16_c },  // ADST_ADST
+    { iadst8_c, idct16_c },   // FLIPADST_DCT
+    { idct8_c, iadst16_c },   // DCT_FLIPADST
+    { iadst8_c, iadst16_c },  // FLIPADST_FLIPADST
+    { iadst8_c, iadst16_c },  // ADST_FLIPADST
+    { iadst8_c, iadst16_c },  // FLIPADST_ADST
+    { iidtx8_c, iidtx16_c },  // IDTX
+    { idct8_c, iidtx16_c },   // V_DCT
+    { iidtx8_c, idct16_c },   // H_DCT
+    { iadst8_c, iidtx16_c },  // V_ADST
+    { iidtx8_c, iadst16_c },  // H_ADST
+    { iadst8_c, iidtx16_c },  // V_FLIPADST
+    { iidtx8_c, iadst16_c },  // H_FLIPADST
+  };
+  const int n = 8;
+  const int n2 = 16;
+
+  int i, j;
+  tran_low_t out[16][8], outtmp[16];
+  tran_low_t *outp = &out[0][0];
+  int outstride = n;
+
+  // inverse transform row vectors and transpose
+  for (i = 0; i < n; ++i) {
+    IHT_16x8[tx_type].rows(input, outtmp);
+    for (j = 0; j < n2; ++j)
+      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
+    input += n2;
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < n2; ++i) {
+    IHT_16x8[tx_type].cols(out[i], out[i]);
+  }
+
+  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
+
+  // Sum with the destination
+  for (i = 0; i < n; ++i) {
+    for (j = 0; j < n2; ++j) {
+      int d = i * stride + j;
+      int s = j * outstride + i;
+      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
+    }
+  }
+}
+
+void vp10_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+                             int tx_type) {
+  static const transform_2d IHT_16x32[] = {
+    { idct32_c, idct16_c },         // DCT_DCT
+    { ihalfright32_c, idct16_c },   // ADST_DCT
+    { idct32_c, iadst16_c },        // DCT_ADST
+    { ihalfright32_c, iadst16_c },  // ADST_ADST
+    { ihalfright32_c, idct16_c },   // FLIPADST_DCT
+    { idct32_c, iadst16_c },        // DCT_FLIPADST
+    { ihalfright32_c, iadst16_c },  // FLIPADST_FLIPADST
+    { ihalfright32_c, iadst16_c },  // ADST_FLIPADST
+    { ihalfright32_c, iadst16_c },  // FLIPADST_ADST
+    { iidtx32_c, iidtx16_c },       // IDTX
+    { idct32_c, iidtx16_c },        // V_DCT
+    { iidtx32_c, idct16_c },        // H_DCT
+    { ihalfright32_c, iidtx16_c },  // V_ADST
+    { iidtx32_c, iadst16_c },       // H_ADST
+    { ihalfright32_c, iidtx16_c },  // V_FLIPADST
+    { iidtx32_c, iadst16_c },       // H_FLIPADST
+  };
+
+  const int n = 16;
+  const int n2 = 32;
+  int i, j;
+  tran_low_t out[16][32], outtmp[16];
+  tran_low_t *outp = &out[0][0];
+  int outstride = n2;
+
+  // inverse transform row vectors and transpose
+  for (i = 0; i < n2; ++i) {
+    IHT_16x32[tx_type].rows(input, outtmp);
+    for (j = 0; j < n; ++j)
+      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
+    input += n;
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < n; ++i) {
+    IHT_16x32[tx_type].cols(out[i], out[i]);
+  }
+
+  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
+
+  // Sum with the destination
+  for (i = 0; i < n2; ++i) {
+    for (j = 0; j < n; ++j) {
+      int d = i * stride + j;
+      int s = j * outstride + i;
+      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
+    }
+  }
+}
+
+void vp10_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+                             int tx_type) {
+  static const transform_2d IHT_32x16[] = {
+    { idct16_c, idct32_c },         // DCT_DCT
+    { iadst16_c, idct32_c },        // ADST_DCT
+    { idct16_c, ihalfright32_c },   // DCT_ADST
+    { iadst16_c, ihalfright32_c },  // ADST_ADST
+    { iadst16_c, idct32_c },        // FLIPADST_DCT
+    { idct16_c, ihalfright32_c },   // DCT_FLIPADST
+    { iadst16_c, ihalfright32_c },  // FLIPADST_FLIPADST
+    { iadst16_c, ihalfright32_c },  // ADST_FLIPADST
+    { iadst16_c, ihalfright32_c },  // FLIPADST_ADST
+    { iidtx16_c, iidtx32_c },       // IDTX
+    { idct16_c, iidtx32_c },        // V_DCT
+    { iidtx16_c, idct32_c },        // H_DCT
+    { iadst16_c, iidtx32_c },       // V_ADST
+    { iidtx16_c, ihalfright32_c },  // H_ADST
+    { iadst16_c, iidtx32_c },       // V_FLIPADST
+    { iidtx16_c, ihalfright32_c },  // H_FLIPADST
+  };
+  const int n = 16;
+  const int n2 = 32;
+
+  int i, j;
+  tran_low_t out[32][16], outtmp[32];
+  tran_low_t *outp = &out[0][0];
+  int outstride = n;
+
+  // inverse transform row vectors and transpose
+  for (i = 0; i < n; ++i) {
+    IHT_32x16[tx_type].rows(input, outtmp);
+    for (j = 0; j < n2; ++j)
+      out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
+    input += n2;
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < n2; ++i) {
+    IHT_32x16[tx_type].cols(out[i], out[i]);
+  }
+
+  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
+
+  // Sum with the destination
+  for (i = 0; i < n; ++i) {
+    for (j = 0; j < n2; ++j) {
+      int d = i * stride + j;
+      int s = j * outstride + i;
+      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
+    }
+  }
+}
+#endif  // CONFIG_EXT_TX
+
+void vp10_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+                          int tx_type) {
+  static const transform_2d IHT_8[] = {
+    { idct8_c, idct8_c },    // DCT_DCT
+    { iadst8_c, idct8_c },   // ADST_DCT
+    { idct8_c, iadst8_c },   // DCT_ADST
+    { iadst8_c, iadst8_c },  // ADST_ADST
+#if CONFIG_EXT_TX
+    { iadst8_c, idct8_c },   // FLIPADST_DCT
+    { idct8_c, iadst8_c },   // DCT_FLIPADST
+    { iadst8_c, iadst8_c },  // FLIPADST_FLIPADST
+    { iadst8_c, iadst8_c },  // ADST_FLIPADST
+    { iadst8_c, iadst8_c },  // FLIPADST_ADST
+    { iidtx8_c, iidtx8_c },  // IDTX
+    { idct8_c, iidtx8_c },   // V_DCT
+    { iidtx8_c, idct8_c },   // H_DCT
+    { iadst8_c, iidtx8_c },  // V_ADST
+    { iidtx8_c, iadst8_c },  // H_ADST
+    { iadst8_c, iidtx8_c },  // V_FLIPADST
+    { iidtx8_c, iadst8_c },  // H_FLIPADST
+#endif                       // CONFIG_EXT_TX
+  };
+
+  int i, j;
+  tran_low_t tmp;
+  tran_low_t out[8][8];
+  tran_low_t *outp = &out[0][0];
+  int outstride = 8;
+
+  // inverse transform row vectors
+  for (i = 0; i < 8; ++i) {
+    IHT_8[tx_type].rows(input, out[i]);
+    input += 8;
+  }
+
+  // transpose
+  for (i = 1; i < 8; i++) {
+    for (j = 0; j < i; j++) {
+      tmp = out[i][j];
+      out[i][j] = out[j][i];
+      out[j][i] = tmp;
+    }
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < 8; ++i) {
+    IHT_8[tx_type].cols(out[i], out[i]);
+  }
+
+#if CONFIG_EXT_TX
+  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8, 8);
+#endif
+
+  // Sum with the destination
+  for (i = 0; i < 8; ++i) {
+    for (j = 0; j < 8; ++j) {
+      int d = i * stride + j;
+      int s = j * outstride + i;
+      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
+    }
+  }
+}
+
+void vp10_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+                             int tx_type) {
+  static const transform_2d IHT_16[] = {
+    { idct16_c, idct16_c },    // DCT_DCT
+    { iadst16_c, idct16_c },   // ADST_DCT
+    { idct16_c, iadst16_c },   // DCT_ADST
+    { iadst16_c, iadst16_c },  // ADST_ADST
+#if CONFIG_EXT_TX
+    { iadst16_c, idct16_c },   // FLIPADST_DCT
+    { idct16_c, iadst16_c },   // DCT_FLIPADST
+    { iadst16_c, iadst16_c },  // FLIPADST_FLIPADST
+    { iadst16_c, iadst16_c },  // ADST_FLIPADST
+    { iadst16_c, iadst16_c },  // FLIPADST_ADST
+    { iidtx16_c, iidtx16_c },  // IDTX
+    { idct16_c, iidtx16_c },   // V_DCT
+    { iidtx16_c, idct16_c },   // H_DCT
+    { iadst16_c, iidtx16_c },  // V_ADST
+    { iidtx16_c, iadst16_c },  // H_ADST
+    { iadst16_c, iidtx16_c },  // V_FLIPADST
+    { iidtx16_c, iadst16_c },  // H_FLIPADST
+#endif                         // CONFIG_EXT_TX
+  };
+
+  int i, j;
+  tran_low_t tmp;
+  tran_low_t out[16][16];
+  tran_low_t *outp = &out[0][0];
+  int outstride = 16;
+
+  // inverse transform row vectors
+  for (i = 0; i < 16; ++i) {
+    IHT_16[tx_type].rows(input, out[i]);
+    input += 16;
+  }
+
+  // transpose
+  for (i = 1; i < 16; i++) {
+    for (j = 0; j < i; j++) {
+      tmp = out[i][j];
+      out[i][j] = out[j][i];
+      out[j][i] = tmp;
+    }
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < 16; ++i) {
+    IHT_16[tx_type].cols(out[i], out[i]);
+  }
+
+#if CONFIG_EXT_TX
+  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 16, 16);
+#endif
+
+  // Sum with the destination
+  for (i = 0; i < 16; ++i) {
+    for (j = 0; j < 16; ++j) {
+      int d = i * stride + j;
+      int s = j * outstride + i;
+      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
+    }
+  }
+}
+
+#if CONFIG_EXT_TX
+void vp10_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
+                              int stride, int tx_type) {
+  static const transform_2d IHT_32[] = {
+    { idct32_c, idct32_c },              // DCT_DCT
+    { ihalfright32_c, idct32_c },        // ADST_DCT
+    { idct32_c, ihalfright32_c },        // DCT_ADST
+    { ihalfright32_c, ihalfright32_c },  // ADST_ADST
+    { ihalfright32_c, idct32_c },        // FLIPADST_DCT
+    { idct32_c, ihalfright32_c },        // DCT_FLIPADST
+    { ihalfright32_c, ihalfright32_c },  // FLIPADST_FLIPADST
+    { ihalfright32_c, ihalfright32_c },  // ADST_FLIPADST
+    { ihalfright32_c, ihalfright32_c },  // FLIPADST_ADST
+    { iidtx32_c, iidtx32_c },            // IDTX
+    { idct32_c, iidtx32_c },             // V_DCT
+    { iidtx32_c, idct32_c },             // H_DCT
+    { ihalfright32_c, iidtx16_c },       // V_ADST
+    { iidtx16_c, ihalfright32_c },       // H_ADST
+    { ihalfright32_c, iidtx16_c },       // V_FLIPADST
+    { iidtx16_c, ihalfright32_c },       // H_FLIPADST
+  };
+
+  int i, j;
+  tran_low_t tmp;
+  tran_low_t out[32][32];
+  tran_low_t *outp = &out[0][0];
+  int outstride = 32;
+
+  // inverse transform row vectors
+  for (i = 0; i < 32; ++i) {
+    IHT_32[tx_type].rows(input, out[i]);
+    input += 32;
+  }
+
+  // transpose
+  for (i = 1; i < 32; i++) {
+    for (j = 0; j < i; j++) {
+      tmp = out[i][j];
+      out[i][j] = out[j][i];
+      out[j][i] = tmp;
+    }
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < 32; ++i) {
+    IHT_32[tx_type].cols(out[i], out[i]);
+  }
+
+  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 32, 32);
+
+  // Sum with the destination
+  for (i = 0; i < 32; ++i) {
+    for (j = 0; j < 32; ++j) {
+      int d = i * stride + j;
+      int s = j * outstride + i;
+      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
+    }
+  }
+}
+#endif  // CONFIG_EXT_TX
+
+// idct
+void vp10_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
+                      int eob) {
+  if (eob > 1)
+    vpx_idct4x4_16_add(input, dest, stride);
+  else
+    vpx_idct4x4_1_add(input, dest, stride);
+}
+
+void vp10_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
+                      int eob) {
+  if (eob > 1)
+    vpx_iwht4x4_16_add(input, dest, stride);
+  else
+    vpx_iwht4x4_1_add(input, dest, stride);
+}
+
+void vp10_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
+                      int eob) {
+  // If dc is 1, then input[0] is the reconstructed value, do not need
+  // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
+
+  // The calculation can be simplified if there are not many non-zero dct
+  // coefficients. Use eobs to decide what to do.
+  // TODO(yunqingwang): "eobs = 1" case is also handled in vp10_short_idct8x8_c.
+  // Combine that with code here.
+  if (eob == 1)
+    // DC only DCT coefficient
+    vpx_idct8x8_1_add(input, dest, stride);
+  else if (eob <= 12)
+    vpx_idct8x8_12_add(input, dest, stride);
+  else
+    vpx_idct8x8_64_add(input, dest, stride);
+}
+
+void vp10_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
+                        int eob) {
+  /* The calculation can be simplified if there are not many non-zero dct
+   * coefficients. Use eobs to separate different cases. */
+  if (eob == 1) /* DC only DCT coefficient. */
+    vpx_idct16x16_1_add(input, dest, stride);
+  else if (eob <= 10)
+    vpx_idct16x16_10_add(input, dest, stride);
+  else
+    vpx_idct16x16_256_add(input, dest, stride);
+}
+
+void vp10_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
+                        int eob) {
+  if (eob == 1)
+    vpx_idct32x32_1_add(input, dest, stride);
+  else if (eob <= 34)
+    // non-zero coeff only in upper-left 8x8
+    vpx_idct32x32_34_add(input, dest, stride);
+  else
+    vpx_idct32x32_1024_add(input, dest, stride);
+}
+
+void vp10_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
+                           int eob, TX_TYPE tx_type, int lossless) {
+  if (lossless) {
+    assert(tx_type == DCT_DCT);
+    vp10_iwht4x4_add(input, dest, stride, eob);
+    return;
+  }
+
+  switch (tx_type) {
+    case DCT_DCT: vp10_idct4x4_add(input, dest, stride, eob); break;
+    case ADST_DCT:
+    case DCT_ADST:
+    case ADST_ADST: vp10_iht4x4_16_add(input, dest, stride, tx_type); break;
+#if CONFIG_EXT_TX
+    case FLIPADST_DCT:
+    case DCT_FLIPADST:
+    case FLIPADST_FLIPADST:
+    case ADST_FLIPADST:
+    case FLIPADST_ADST: vp10_iht4x4_16_add(input, dest, stride, tx_type); break;
+    case V_DCT:
+    case H_DCT:
+    case V_ADST:
+    case H_ADST:
+    case V_FLIPADST:
+    case H_FLIPADST:
+      // Use C version since DST only exists in C code
+      vp10_iht4x4_16_add_c(input, dest, stride, tx_type);
+      break;
+    case IDTX: inv_idtx_add_c(input, dest, stride, 4, tx_type); break;
+#endif  // CONFIG_EXT_TX
+    default: assert(0); break;
+  }
+}
+
+#if CONFIG_EXT_TX
+void vp10_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest, int stride,
+                           int eob, TX_TYPE tx_type) {
+  (void)eob;
+  vp10_iht4x8_32_add(input, dest, stride, tx_type);
+}
+
+void vp10_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest, int stride,
+                           int eob, TX_TYPE tx_type) {
+  (void)eob;
+  vp10_iht8x4_32_add(input, dest, stride, tx_type);
+}
+
+void vp10_inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest, int stride,
+                            int eob, TX_TYPE tx_type) {
+  (void)eob;
+  vp10_iht8x16_128_add(input, dest, stride, tx_type);
+}
+
+void vp10_inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest, int stride,
+                            int eob, TX_TYPE tx_type) {
+  (void)eob;
+  vp10_iht16x8_128_add(input, dest, stride, tx_type);
+}
+
+void vp10_inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest, int stride,
+                             int eob, TX_TYPE tx_type) {
+  (void)eob;
+  vp10_iht16x32_512_add(input, dest, stride, tx_type);
+}
+
+void vp10_inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest, int stride,
+                             int eob, TX_TYPE tx_type) {
+  (void)eob;
+  vp10_iht32x16_512_add(input, dest, stride, tx_type);
+}
+#endif  // CONFIG_EXT_TX
+
+void vp10_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride,
+                           int eob, TX_TYPE tx_type) {
+  switch (tx_type) {
+    case DCT_DCT: vp10_idct8x8_add(input, dest, stride, eob); break;
+    case ADST_DCT:
+    case DCT_ADST:
+    case ADST_ADST: vp10_iht8x8_64_add(input, dest, stride, tx_type); break;
+#if CONFIG_EXT_TX
+    case FLIPADST_DCT:
+    case DCT_FLIPADST:
+    case FLIPADST_FLIPADST:
+    case ADST_FLIPADST:
+    case FLIPADST_ADST: vp10_iht8x8_64_add(input, dest, stride, tx_type); break;
+    case V_DCT:
+    case H_DCT:
+    case V_ADST:
+    case H_ADST:
+    case V_FLIPADST:
+    case H_FLIPADST:
+      // Use C version since DST only exists in C code
+      vp10_iht8x8_64_add_c(input, dest, stride, tx_type);
+      break;
+    case IDTX: inv_idtx_add_c(input, dest, stride, 8, tx_type); break;
+#endif  // CONFIG_EXT_TX
+    default: assert(0); break;
+  }
+}
+
+void vp10_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, int stride,
+                             int eob, TX_TYPE tx_type) {
+  switch (tx_type) {
+    case DCT_DCT: vp10_idct16x16_add(input, dest, stride, eob); break;
+    case ADST_DCT:
+    case DCT_ADST:
+    case ADST_ADST: vp10_iht16x16_256_add(input, dest, stride, tx_type); break;
+#if CONFIG_EXT_TX
+    case FLIPADST_DCT:
+    case DCT_FLIPADST:
+    case FLIPADST_FLIPADST:
+    case ADST_FLIPADST:
+    case FLIPADST_ADST:
+      vp10_iht16x16_256_add(input, dest, stride, tx_type);
+      break;
+    case V_DCT:
+    case H_DCT:
+    case V_ADST:
+    case H_ADST:
+    case V_FLIPADST:
+    case H_FLIPADST:
+      // Use C version since DST only exists in C code
+      vp10_iht16x16_256_add_c(input, dest, stride, tx_type);
+      break;
+    case IDTX: inv_idtx_add_c(input, dest, stride, 16, tx_type); break;
+#endif  // CONFIG_EXT_TX
+    default: assert(0); break;
+  }
+}
+
+void vp10_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest, int stride,
+                             int eob, TX_TYPE tx_type) {
+  switch (tx_type) {
+    case DCT_DCT: vp10_idct32x32_add(input, dest, stride, eob); break;
+#if CONFIG_EXT_TX
+    case ADST_DCT:
+    case DCT_ADST:
+    case ADST_ADST:
+    case FLIPADST_DCT:
+    case DCT_FLIPADST:
+    case FLIPADST_FLIPADST:
+    case ADST_FLIPADST:
+    case FLIPADST_ADST:
+    case V_DCT:
+    case H_DCT:
+    case V_ADST:
+    case H_ADST:
+    case V_FLIPADST:
+    case H_FLIPADST:
+      vp10_iht32x32_1024_add_c(input, dest, stride, tx_type);
+      break;
+    case IDTX: inv_idtx_add_c(input, dest, stride, 32, tx_type); break;
+#endif  // CONFIG_EXT_TX
+    default: assert(0); break;
+  }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
+                                 int stride, int tx_type, int bd) {
+  static const highbd_transform_2d HIGH_IHT_4[] = {
+    { vpx_highbd_idct4_c, vpx_highbd_idct4_c },    // DCT_DCT
+    { vpx_highbd_iadst4_c, vpx_highbd_idct4_c },   // ADST_DCT
+    { vpx_highbd_idct4_c, vpx_highbd_iadst4_c },   // DCT_ADST
+    { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c },  // ADST_ADST
+#if CONFIG_EXT_TX
+    { vpx_highbd_iadst4_c, vpx_highbd_idct4_c },   // FLIPADST_DCT
+    { vpx_highbd_idct4_c, vpx_highbd_iadst4_c },   // DCT_FLIPADST
+    { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c },  // FLIPADST_FLIPADST
+    { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c },  // ADST_FLIPADST
+    { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c },  // FLIPADST_ADST
+    { highbd_iidtx4_c, highbd_iidtx4_c },          // IDTX
+    { vpx_highbd_idct4_c, highbd_iidtx4_c },       // V_DCT
+    { highbd_iidtx4_c, vpx_highbd_idct4_c },       // H_DCT
+    { vpx_highbd_iadst4_c, highbd_iidtx4_c },      // V_ADST
+    { highbd_iidtx4_c, vpx_highbd_iadst4_c },      // H_ADST
+    { vpx_highbd_iadst4_c, highbd_iidtx4_c },      // V_FLIPADST
+    { highbd_iidtx4_c, vpx_highbd_iadst4_c },      // H_FLIPADST
+#endif                                             // CONFIG_EXT_TX
+  };
+
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+  int i, j;
+  tran_low_t tmp;
+  tran_low_t out[4][4];
+  tran_low_t *outp = &out[0][0];
+  int outstride = 4;
+
+  // inverse transform row vectors
+  for (i = 0; i < 4; ++i) {
+    HIGH_IHT_4[tx_type].rows(input, out[i], bd);
+    input += 4;
+  }
+
+  // transpose
+  for (i = 1; i < 4; i++) {
+    for (j = 0; j < i; j++) {
+      tmp = out[i][j];
+      out[i][j] = out[j][i];
+      out[j][i] = tmp;
+    }
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < 4; ++i) {
+    HIGH_IHT_4[tx_type].cols(out[i], out[i], bd);
+  }
+
+#if CONFIG_EXT_TX
+  maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
+#endif
+
+  // Sum with the destination
+  for (i = 0; i < 4; ++i) {
+    for (j = 0; j < 4; ++j) {
+      int d = i * stride + j;
+      int s = j * outstride + i;
+      dest[d] =
+          highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4), bd);
+    }
+  }
+}
+
+#if CONFIG_EXT_TX
+void vp10_highbd_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest8,
+                                 int stride, int tx_type, int bd) {
+  static const highbd_transform_2d HIGH_IHT_4x8[] = {
+    { vpx_highbd_idct8_c, vpx_highbd_idct4_c },    // DCT_DCT
+    { vpx_highbd_iadst8_c, vpx_highbd_idct4_c },   // ADST_DCT
+    { vpx_highbd_idct8_c, vpx_highbd_iadst4_c },   // DCT_ADST
+    { vpx_highbd_iadst8_c, vpx_highbd_iadst4_c },  // ADST_ADST
+    { vpx_highbd_iadst8_c, vpx_highbd_idct4_c },   // FLIPADST_DCT
+    { vpx_highbd_idct8_c, vpx_highbd_iadst4_c },   // DCT_FLIPADST
+    { vpx_highbd_iadst8_c, vpx_highbd_iadst4_c },  // FLIPADST_FLIPADST
+    { vpx_highbd_iadst8_c, vpx_highbd_iadst4_c },  // ADST_FLIPADST
+    { vpx_highbd_iadst8_c, vpx_highbd_iadst4_c },  // FLIPADST_ADST
+    { highbd_iidtx8_c, highbd_iidtx4_c },          // IDTX
+    { vpx_highbd_idct8_c, highbd_iidtx4_c },       // V_DCT
+    { highbd_iidtx8_c, vpx_highbd_idct4_c },       // H_DCT
+    { vpx_highbd_iadst8_c, highbd_iidtx4_c },      // V_ADST
+    { highbd_iidtx8_c, vpx_highbd_iadst4_c },      // H_ADST
+    { vpx_highbd_iadst8_c, highbd_iidtx4_c },      // V_FLIPADST
+    { highbd_iidtx8_c, vpx_highbd_iadst4_c },      // H_FLIPADST
+  };
+  const int n = 4;
+  const int n2 = 8;
+
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+  int i, j;
+  tran_low_t out[4][8], outtmp[4];
+  tran_low_t *outp = &out[0][0];
+  int outstride = n2;
+
+  // inverse transform row vectors, and transpose
+  for (i = 0; i < n2; ++i) {
+    HIGH_IHT_4x8[tx_type].rows(input, outtmp, bd);
+    for (j = 0; j < n; ++j) {
+      out[j][i] =
+          HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
+    }
+    input += n;
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < n; ++i) {
+    HIGH_IHT_4x8[tx_type].cols(out[i], out[i], bd);
+  }
+
+  maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
+
+  // Sum with the destination
+  for (i = 0; i < n2; ++i) {
+    for (j = 0; j < n; ++j) {
+      int d = i * stride + j;
+      int s = j * outstride + i;
+      dest[d] =
+          highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
+    }
+  }
+}
+
+void vp10_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest8,
+                                 int stride, int tx_type, int bd) {
+  static const highbd_transform_2d HIGH_IHT_8x4[] = {
+    { vpx_highbd_idct4_c, vpx_highbd_idct8_c },    // DCT_DCT
+    { vpx_highbd_iadst4_c, vpx_highbd_idct8_c },   // ADST_DCT
+    { vpx_highbd_idct4_c, vpx_highbd_iadst8_c },   // DCT_ADST
+    { vpx_highbd_iadst4_c, vpx_highbd_iadst8_c },  // ADST_ADST
+    { vpx_highbd_iadst4_c, vpx_highbd_idct8_c },   // FLIPADST_DCT
+    { vpx_highbd_idct4_c, vpx_highbd_iadst8_c },   // DCT_FLIPADST
+    { vpx_highbd_iadst4_c, vpx_highbd_iadst8_c },  // FLIPADST_FLIPADST
+    { vpx_highbd_iadst4_c, vpx_highbd_iadst8_c },  // ADST_FLIPADST
+    { vpx_highbd_iadst4_c, vpx_highbd_iadst8_c },  // FLIPADST_ADST
+    { highbd_iidtx4_c, highbd_iidtx8_c },          // IDTX
+    { vpx_highbd_idct4_c, highbd_iidtx8_c },       // V_DCT
+    { highbd_iidtx4_c, vpx_highbd_idct8_c },       // H_DCT
+    { vpx_highbd_iadst4_c, highbd_iidtx8_c },      // V_ADST
+    { highbd_iidtx4_c, vpx_highbd_iadst8_c },      // H_ADST
+    { vpx_highbd_iadst4_c, highbd_iidtx8_c },      // V_FLIPADST
+    { highbd_iidtx4_c, vpx_highbd_iadst8_c },      // H_FLIPADST
+  };
+  const int n = 4;
+  const int n2 = 8;
+
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+  int i, j;
+  tran_low_t out[8][4], outtmp[8];
+  tran_low_t *outp = &out[0][0];
+  int outstride = n;
+
+  // inverse transform row vectors, and transpose
+  for (i = 0; i < n; ++i) {
+    HIGH_IHT_8x4[tx_type].rows(input, outtmp, bd);
+    for (j = 0; j < n2; ++j) {
+      out[j][i] =
+          HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
+    }
+    input += n2;
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < n2; ++i) {
+    HIGH_IHT_8x4[tx_type].cols(out[i], out[i], bd);
+  }
+
+  maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n2);
+
+  // Sum with the destination
+  for (i = 0; i < n; ++i) {
+    for (j = 0; j < n2; ++j) {
+      int d = i * stride + j;
+      int s = j * outstride + i;
+      dest[d] =
+          highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
+    }
+  }
+}
+
+void vp10_highbd_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest8,
+                                   int stride, int tx_type, int bd) {
+  static const highbd_transform_2d HIGH_IHT_8x16[] = {
+    { vpx_highbd_idct16_c, vpx_highbd_idct8_c },    // DCT_DCT
+    { vpx_highbd_iadst16_c, vpx_highbd_idct8_c },   // ADST_DCT
+    { vpx_highbd_idct16_c, vpx_highbd_iadst8_c },   // DCT_ADST
+    { vpx_highbd_iadst16_c, vpx_highbd_iadst8_c },  // ADST_ADST
+    { vpx_highbd_iadst16_c, vpx_highbd_idct8_c },   // FLIPADST_DCT
+    { vpx_highbd_idct16_c, vpx_highbd_iadst8_c },   // DCT_FLIPADST
+    { vpx_highbd_iadst16_c, vpx_highbd_iadst8_c },  // FLIPADST_FLIPADST
+    { vpx_highbd_iadst16_c, vpx_highbd_iadst8_c },  // ADST_FLIPADST
+    { vpx_highbd_iadst16_c, vpx_highbd_iadst8_c },  // FLIPADST_ADST
+    { highbd_iidtx16_c, highbd_iidtx8_c },          // IDTX
+    { vpx_highbd_idct16_c, highbd_iidtx8_c },       // V_DCT
+    { highbd_iidtx16_c, vpx_highbd_idct8_c },       // H_DCT
+    { vpx_highbd_iadst16_c, highbd_iidtx8_c },      // V_ADST
+    { highbd_iidtx16_c, vpx_highbd_iadst8_c },      // H_ADST
+    { vpx_highbd_iadst16_c, highbd_iidtx8_c },      // V_FLIPADST
+    { highbd_iidtx16_c, vpx_highbd_iadst8_c },      // H_FLIPADST
+  };
+  const int n = 8;
+  const int n2 = 16;
+
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+  int i, j;
+  tran_low_t out[8][16], outtmp[8];
+  tran_low_t *outp = &out[0][0];
+  int outstride = n2;
+
+  // inverse transform row vectors, and transpose
+  for (i = 0; i < n2; ++i) {
+    HIGH_IHT_8x16[tx_type].rows(input, outtmp, bd);
+    for (j = 0; j < n; ++j)
+      out[j][i] =
+          HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
+    input += n;
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < n; ++i) {
+    HIGH_IHT_8x16[tx_type].cols(out[i], out[i], bd);
+  }
+
+  maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
+
+  // Sum with the destination
+  for (i = 0; i < n2; ++i) {
+    for (j = 0; j < n; ++j) {
+      int d = i * stride + j;
+      int s = j * outstride + i;
+      dest[d] =
+          highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
+    }
+  }
+}
+
+void vp10_highbd_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest8,
+                                   int stride, int tx_type, int bd) {
+  static const highbd_transform_2d HIGH_IHT_16x8[] = {
+    { vpx_highbd_idct8_c, vpx_highbd_idct16_c },    // DCT_DCT
+    { vpx_highbd_iadst8_c, vpx_highbd_idct16_c },   // ADST_DCT
+    { vpx_highbd_idct8_c, vpx_highbd_iadst16_c },   // DCT_ADST
+    { vpx_highbd_iadst8_c, vpx_highbd_iadst16_c },  // ADST_ADST
+    { vpx_highbd_iadst8_c, vpx_highbd_idct16_c },   // FLIPADST_DCT
+    { vpx_highbd_idct8_c, vpx_highbd_iadst16_c },   // DCT_FLIPADST
+    { vpx_highbd_iadst8_c, vpx_highbd_iadst16_c },  // FLIPADST_FLIPADST
+    { vpx_highbd_iadst8_c, vpx_highbd_iadst16_c },  // ADST_FLIPADST
+    { vpx_highbd_iadst8_c, vpx_highbd_iadst16_c },  // FLIPADST_ADST
+    { highbd_iidtx8_c, highbd_iidtx16_c },          // IDTX
+    { vpx_highbd_idct8_c, highbd_iidtx16_c },       // V_DCT
+    { highbd_iidtx8_c, vpx_highbd_idct16_c },       // H_DCT
+    { vpx_highbd_iadst8_c, highbd_iidtx16_c },      // V_ADST
+    { highbd_iidtx8_c, vpx_highbd_iadst16_c },      // H_ADST
+    { vpx_highbd_iadst8_c, highbd_iidtx16_c },      // V_FLIPADST
+    { highbd_iidtx8_c, vpx_highbd_iadst16_c },      // H_FLIPADST
+  };
+  const int n = 8;
+  const int n2 = 16;
+
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+  int i, j;
+  tran_low_t out[16][8], outtmp[16];
+  tran_low_t *outp = &out[0][0];
+  int outstride = n;
+
+  // inverse transform row vectors, and transpose
+  for (i = 0; i < n; ++i) {
+    HIGH_IHT_16x8[tx_type].rows(input, outtmp, bd);
+    for (j = 0; j < n2; ++j)
+      out[j][i] =
+          HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
+    input += n2;
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < n2; ++i) {
+    HIGH_IHT_16x8[tx_type].cols(out[i], out[i], bd);
+  }
+
+  maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n2);
+
+  // Sum with the destination
+  for (i = 0; i < n; ++i) {
+    for (j = 0; j < n2; ++j) {
+      int d = i * stride + j;
+      int s = j * outstride + i;
+      dest[d] =
+          highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
+    }
+  }
+}
+
+void vp10_highbd_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest8,
+                                    int stride, int tx_type, int bd) {
+  static const highbd_transform_2d HIGH_IHT_16x32[] = {
+    { vpx_highbd_idct32_c, vpx_highbd_idct16_c },     // DCT_DCT
+    { highbd_ihalfright32_c, vpx_highbd_idct16_c },   // ADST_DCT
+    { vpx_highbd_idct32_c, vpx_highbd_iadst16_c },    // DCT_ADST
+    { highbd_ihalfright32_c, vpx_highbd_iadst16_c },  // ADST_ADST
+    { highbd_ihalfright32_c, vpx_highbd_idct16_c },   // FLIPADST_DCT
+    { vpx_highbd_idct32_c, vpx_highbd_iadst16_c },    // DCT_FLIPADST
+    { highbd_ihalfright32_c, vpx_highbd_iadst16_c },  // FLIPADST_FLIPADST
+    { highbd_ihalfright32_c, vpx_highbd_iadst16_c },  // ADST_FLIPADST
+    { highbd_ihalfright32_c, vpx_highbd_iadst16_c },  // FLIPADST_ADST
+    { highbd_iidtx32_c, highbd_iidtx16_c },           // IDTX
+    { vpx_highbd_idct32_c, highbd_iidtx16_c },        // V_DCT
+    { highbd_iidtx32_c, vpx_highbd_idct16_c },        // H_DCT
+    { highbd_ihalfright32_c, highbd_iidtx16_c },      // V_ADST
+    { highbd_iidtx32_c, vpx_highbd_iadst16_c },       // H_ADST
+    { highbd_ihalfright32_c, highbd_iidtx16_c },      // V_FLIPADST
+    { highbd_iidtx32_c, vpx_highbd_iadst16_c },       // H_FLIPADST
+  };
+  const int n = 16;
+  const int n2 = 32;
+
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+  int i, j;
+  tran_low_t out[16][32], outtmp[16];
+  tran_low_t *outp = &out[0][0];
+  int outstride = n2;
+
+  // inverse transform row vectors, and transpose
+  for (i = 0; i < n2; ++i) {
+    HIGH_IHT_16x32[tx_type].rows(input, outtmp, bd);
+    for (j = 0; j < n; ++j)
+      out[j][i] =
+          HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
+    input += n;
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < n; ++i) {
+    HIGH_IHT_16x32[tx_type].cols(out[i], out[i], bd);
+  }
+
+  maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
+
+  // Sum with the destination
+  for (i = 0; i < n2; ++i) {
+    for (j = 0; j < n; ++j) {
+      int d = i * stride + j;
+      int s = j * outstride + i;
+      dest[d] =
+          highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
+    }
+  }
+}
+
+void vp10_highbd_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest8,
+                                    int stride, int tx_type, int bd) {
+  static const highbd_transform_2d HIGH_IHT_32x16[] = {
+    { vpx_highbd_idct16_c, vpx_highbd_idct32_c },     // DCT_DCT
+    { vpx_highbd_iadst16_c, vpx_highbd_idct32_c },    // ADST_DCT
+    { vpx_highbd_idct16_c, highbd_ihalfright32_c },   // DCT_ADST
+    { vpx_highbd_iadst16_c, highbd_ihalfright32_c },  // ADST_ADST
+    { vpx_highbd_iadst16_c, vpx_highbd_idct32_c },    // FLIPADST_DCT
+    { vpx_highbd_idct16_c, highbd_ihalfright32_c },   // DCT_FLIPADST
+    { vpx_highbd_iadst16_c, highbd_ihalfright32_c },  // FLIPADST_FLIPADST
+    { vpx_highbd_iadst16_c, highbd_ihalfright32_c },  // ADST_FLIPADST
+    { vpx_highbd_iadst16_c, highbd_ihalfright32_c },  // FLIPADST_ADST
+    { highbd_iidtx16_c, highbd_iidtx32_c },           // IDTX
+    { vpx_highbd_idct16_c, highbd_iidtx32_c },        // V_DCT
+    { highbd_iidtx16_c, vpx_highbd_idct32_c },        // H_DCT
+    { vpx_highbd_iadst16_c, highbd_iidtx32_c },       // V_ADST
+    { highbd_iidtx16_c, highbd_ihalfright32_c },      // H_ADST
+    { vpx_highbd_iadst16_c, highbd_iidtx32_c },       // V_FLIPADST
+    { highbd_iidtx16_c, highbd_ihalfright32_c },      // H_FLIPADST
+  };
+  const int n = 16;
+  const int n2 = 32;
+
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+  int i, j;
+  tran_low_t out[32][16], outtmp[32];
+  tran_low_t *outp = &out[0][0];
+  int outstride = n;
+
+  // inverse transform row vectors, and transpose
+  for (i = 0; i < n; ++i) {
+    HIGH_IHT_32x16[tx_type].rows(input, outtmp, bd);
+    for (j = 0; j < n2; ++j)
+      out[j][i] =
+          HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
+    input += n2;
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < n2; ++i) {
+    HIGH_IHT_32x16[tx_type].cols(out[i], out[i], bd);
+  }
+
+  maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n2);
+
+  // Sum with the destination
+  for (i = 0; i < n; ++i) {
+    for (j = 0; j < n2; ++j) {
+      int d = i * stride + j;
+      int s = j * outstride + i;
+      dest[d] =
+          highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
+    }
+  }
+}
+#endif  // CONFIG_EXT_TX
+
+void vp10_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
+                                 int stride, int tx_type, int bd) {
+  static const highbd_transform_2d HIGH_IHT_8[] = {
+    { vpx_highbd_idct8_c, vpx_highbd_idct8_c },    // DCT_DCT
+    { vpx_highbd_iadst8_c, vpx_highbd_idct8_c },   // ADST_DCT
+    { vpx_highbd_idct8_c, vpx_highbd_iadst8_c },   // DCT_ADST
+    { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c },  // ADST_ADST
+#if CONFIG_EXT_TX
+    { vpx_highbd_iadst8_c, vpx_highbd_idct8_c },   // FLIPADST_DCT
+    { vpx_highbd_idct8_c, vpx_highbd_iadst8_c },   // DCT_FLIPADST
+    { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c },  // FLIPADST_FLIPADST
+    { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c },  // ADST_FLIPADST
+    { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c },  // FLIPADST_ADST
+    { highbd_iidtx8_c, highbd_iidtx8_c },          // IDTX
+    { vpx_highbd_idct8_c, highbd_iidtx8_c },       // V_DCT
+    { highbd_iidtx8_c, vpx_highbd_idct8_c },       // H_DCT
+    { vpx_highbd_iadst8_c, highbd_iidtx8_c },      // V_ADST
+    { highbd_iidtx8_c, vpx_highbd_iadst8_c },      // H_ADST
+    { vpx_highbd_iadst8_c, highbd_iidtx8_c },      // V_FLIPADST
+    { highbd_iidtx8_c, vpx_highbd_iadst8_c },      // H_FLIPADST
+#endif                                             // CONFIG_EXT_TX
+  };
+
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+  int i, j;
+  tran_low_t tmp;
+  tran_low_t out[8][8];
+  tran_low_t *outp = &out[0][0];
+  int outstride = 8;
+
+  // inverse transform row vectors
+  for (i = 0; i < 8; ++i) {
+    HIGH_IHT_8[tx_type].rows(input, out[i], bd);
+    input += 8;
+  }
+
+  // transpose
+  for (i = 1; i < 8; i++) {
+    for (j = 0; j < i; j++) {
+      tmp = out[i][j];
+      out[i][j] = out[j][i];
+      out[j][i] = tmp;
+    }
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < 8; ++i) {
+    HIGH_IHT_8[tx_type].cols(out[i], out[i], bd);
+  }
+
+#if CONFIG_EXT_TX
+  maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 8, 8);
+#endif
+
+  // Sum with the destination
+  for (i = 0; i < 8; ++i) {
+    for (j = 0; j < 8; ++j) {
+      int d = i * stride + j;
+      int s = j * outstride + i;
+      dest[d] =
+          highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
+    }
+  }
+}
+
+void vp10_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
+                                    int stride, int tx_type, int bd) {
+  static const highbd_transform_2d HIGH_IHT_16[] = {
+    { vpx_highbd_idct16_c, vpx_highbd_idct16_c },    // DCT_DCT
+    { vpx_highbd_iadst16_c, vpx_highbd_idct16_c },   // ADST_DCT
+    { vpx_highbd_idct16_c, vpx_highbd_iadst16_c },   // DCT_ADST
+    { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c },  // ADST_ADST
+#if CONFIG_EXT_TX
+    { vpx_highbd_iadst16_c, vpx_highbd_idct16_c },   // FLIPADST_DCT
+    { vpx_highbd_idct16_c, vpx_highbd_iadst16_c },   // DCT_FLIPADST
+    { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c },  // FLIPADST_FLIPADST
+    { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c },  // ADST_FLIPADST
+    { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c },  // FLIPADST_ADST
+    { highbd_iidtx16_c, highbd_iidtx16_c },          // IDTX
+    { vpx_highbd_idct16_c, highbd_iidtx16_c },       // V_DCT
+    { highbd_iidtx16_c, vpx_highbd_idct16_c },       // H_DCT
+    { vpx_highbd_iadst16_c, highbd_iidtx16_c },      // V_ADST
+    { highbd_iidtx16_c, vpx_highbd_iadst16_c },      // H_ADST
+    { vpx_highbd_iadst16_c, highbd_iidtx16_c },      // V_FLIPADST
+    { highbd_iidtx16_c, vpx_highbd_iadst16_c },      // H_FLIPADST
+#endif                                               // CONFIG_EXT_TX
+  };
+
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+  int i, j;
+  tran_low_t tmp;
+  tran_low_t out[16][16];
+  tran_low_t *outp = &out[0][0];
+  int outstride = 16;
+
+  // inverse transform row vectors
+  for (i = 0; i < 16; ++i) {
+    HIGH_IHT_16[tx_type].rows(input, out[i], bd);
+    input += 16;
+  }
+
+  // transpose
+  for (i = 1; i < 16; i++) {
+    for (j = 0; j < i; j++) {
+      tmp = out[i][j];
+      out[i][j] = out[j][i];
+      out[j][i] = tmp;
+    }
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < 16; ++i) {
+    HIGH_IHT_16[tx_type].cols(out[i], out[i], bd);
+  }
+
+#if CONFIG_EXT_TX
+  maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 16, 16);
+#endif
+
+  // Sum with the destination
+  for (i = 0; i < 16; ++i) {
+    for (j = 0; j < 16; ++j) {
+      int d = i * stride + j;
+      int s = j * outstride + i;
+      dest[d] =
+          highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
+    }
+  }
+}
+
+#if CONFIG_EXT_TX
+void vp10_highbd_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
+                                     int stride, int tx_type, int bd) {
+  static const highbd_transform_2d HIGH_IHT_32[] = {
+    { vpx_highbd_idct32_c, vpx_highbd_idct32_c },      // DCT_DCT
+    { highbd_ihalfright32_c, vpx_highbd_idct32_c },    // ADST_DCT
+    { vpx_highbd_idct32_c, highbd_ihalfright32_c },    // DCT_ADST
+    { highbd_ihalfright32_c, highbd_ihalfright32_c },  // ADST_ADST
+    { highbd_ihalfright32_c, vpx_highbd_idct32_c },    // FLIPADST_DCT
+    { vpx_highbd_idct32_c, highbd_ihalfright32_c },    // DCT_FLIPADST
+    { highbd_ihalfright32_c, highbd_ihalfright32_c },  // FLIPADST_FLIPADST
+    { highbd_ihalfright32_c, highbd_ihalfright32_c },  // ADST_FLIPADST
+    { highbd_ihalfright32_c, highbd_ihalfright32_c },  // FLIPADST_ADST
+    { highbd_iidtx32_c, highbd_iidtx32_c },            // IDTX
+    { vpx_highbd_idct32_c, highbd_iidtx32_c },         // V_DCT
+    { highbd_iidtx32_c, vpx_highbd_idct32_c },         // H_DCT
+    { highbd_ihalfright32_c, highbd_iidtx32_c },       // V_ADST
+    { highbd_iidtx32_c, highbd_ihalfright32_c },       // H_ADST
+    { highbd_ihalfright32_c, highbd_iidtx32_c },       // V_FLIPADST
+    { highbd_iidtx32_c, highbd_ihalfright32_c },       // H_FLIPADST
+  };
+
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+  int i, j;
+  tran_low_t tmp;
+  tran_low_t out[32][32];
+  tran_low_t *outp = &out[0][0];
+  int outstride = 32;
+
+  // inverse transform row vectors
+  for (i = 0; i < 32; ++i) {
+    HIGH_IHT_32[tx_type].rows(input, out[i], bd);
+    input += 32;
+  }
+
+  // transpose
+  for (i = 1; i < 32; i++) {
+    for (j = 0; j < i; j++) {
+      tmp = out[i][j];
+      out[i][j] = out[j][i];
+      out[j][i] = tmp;
+    }
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < 32; ++i) {
+    HIGH_IHT_32[tx_type].cols(out[i], out[i], bd);
+  }
+
+  maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 32, 32);
+
+  // Sum with the destination
+  for (i = 0; i < 32; ++i) {
+    for (j = 0; j < 32; ++j) {
+      int d = i * stride + j;
+      int s = j * outstride + i;
+      dest[d] =
+          highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
+    }
+  }
+}
+#endif  // CONFIG_EXT_TX
+
+// idct
+void vp10_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
+                             int eob, int bd) {
+  if (eob > 1)
+    vpx_highbd_idct4x4_16_add(input, dest, stride, bd);
+  else
+    vpx_highbd_idct4x4_1_add(input, dest, stride, bd);
+}
+
+void vp10_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
+                             int eob, int bd) {
+  if (eob > 1)
+    vpx_highbd_iwht4x4_16_add(input, dest, stride, bd);
+  else
+    vpx_highbd_iwht4x4_1_add(input, dest, stride, bd);
+}
+
+void vp10_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
+                             int eob, int bd) {
+  // If dc is 1, then input[0] is the reconstructed value, do not need
+  // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
+
+  // The calculation can be simplified if there are not many non-zero dct
+  // coefficients. Use eobs to decide what to do.
+  // TODO(yunqingwang): "eobs = 1" case is also handled in vp10_short_idct8x8_c.
+  // Combine that with code here.
+  // DC only DCT coefficient
+  if (eob == 1) {
+    vpx_highbd_idct8x8_1_add(input, dest, stride, bd);
+  } else if (eob <= 10) {
+    vpx_highbd_idct8x8_10_add(input, dest, stride, bd);
+  } else {
+    vpx_highbd_idct8x8_64_add(input, dest, stride, bd);
+  }
+}
+
+void vp10_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest,
+                               int stride, int eob, int bd) {
+  // The calculation can be simplified if there are not many non-zero dct
+  // coefficients. Use eobs to separate different cases.
+  // DC only DCT coefficient.
+  if (eob == 1) {
+    vpx_highbd_idct16x16_1_add(input, dest, stride, bd);
+  } else if (eob <= 10) {
+    vpx_highbd_idct16x16_10_add(input, dest, stride, bd);
+  } else {
+    vpx_highbd_idct16x16_256_add(input, dest, stride, bd);
+  }
+}
+
+void vp10_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest,
+                               int stride, int eob, int bd) {
+  // Non-zero coeff only in upper-left 8x8
+  if (eob == 1) {
+    vpx_highbd_idct32x32_1_add(input, dest, stride, bd);
+  } else if (eob <= 34) {
+    vpx_highbd_idct32x32_34_add(input, dest, stride, bd);
+  } else {
+    vpx_highbd_idct32x32_1024_add(input, dest, stride, bd);
+  }
+}
+
+void vp10_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
+                                  int stride, int eob, int bd, TX_TYPE tx_type,
+                                  int lossless) {
+  if (lossless) {
+    assert(tx_type == DCT_DCT);
+    vp10_highbd_iwht4x4_add(input, dest, stride, eob, bd);
+    return;
+  }
+
+  switch (tx_type) {
+    case DCT_DCT:
+    case ADST_DCT:
+    case DCT_ADST:
+    case ADST_ADST:
+      vp10_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
+                              bd);
+      break;
+#if CONFIG_EXT_TX
+    case FLIPADST_DCT:
+    case DCT_FLIPADST:
+    case FLIPADST_FLIPADST:
+    case ADST_FLIPADST:
+    case FLIPADST_ADST:
+      vp10_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
+                              bd);
+      break;
+    case V_DCT:
+    case H_DCT:
+    case V_ADST:
+    case H_ADST:
+    case V_FLIPADST:
+    case H_FLIPADST:
+      // Use C version since DST only exists in C code
+      vp10_highbd_iht4x4_16_add_c(input, dest, stride, tx_type, bd);
+      break;
+    case IDTX:
+      highbd_inv_idtx_add_c(input, dest, stride, 4, tx_type, bd);
+      break;
+#endif  // CONFIG_EXT_TX
+    default: assert(0); break;
+  }
+}
+
+#if CONFIG_EXT_TX
+void vp10_highbd_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
+                                  int stride, int eob, int bd,
+                                  TX_TYPE tx_type) {
+  (void)eob;
+  vp10_highbd_iht4x8_32_add_c(input, dest, stride, tx_type, bd);
+}
+
+void vp10_highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
+                                  int stride, int eob, int bd,
+                                  TX_TYPE tx_type) {
+  (void)eob;
+  vp10_highbd_iht8x4_32_add_c(input, dest, stride, tx_type, bd);
+}
+
+void vp10_highbd_inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest,
+                                   int stride, int eob, int bd,
+                                   TX_TYPE tx_type) {
+  (void)eob;
+  vp10_highbd_iht8x16_128_add_c(input, dest, stride, tx_type, bd);
+}
+
+void vp10_highbd_inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest,
+                                   int stride, int eob, int bd,
+                                   TX_TYPE tx_type) {
+  (void)eob;
+  vp10_highbd_iht16x8_128_add_c(input, dest, stride, tx_type, bd);
+}
+
+void vp10_highbd_inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest,
+                                    int stride, int eob, int bd,
+                                    TX_TYPE tx_type) {
+  (void)eob;
+  vp10_highbd_iht16x32_512_add_c(input, dest, stride, tx_type, bd);
+}
+
+void vp10_highbd_inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
+                                    int stride, int eob, int bd,
+                                    TX_TYPE tx_type) {
+  (void)eob;
+  vp10_highbd_iht32x16_512_add_c(input, dest, stride, tx_type, bd);
+}
+#endif  // CONFIG_EXT_TX
+
+void vp10_highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
+                                  int stride, int eob, int bd,
+                                  TX_TYPE tx_type) {
+  (void)eob;
+  switch (tx_type) {
+    case DCT_DCT:
+    case ADST_DCT:
+    case DCT_ADST:
+    case ADST_ADST:
+      vp10_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
+                              bd);
+      break;
+#if CONFIG_EXT_TX
+    case FLIPADST_DCT:
+    case DCT_FLIPADST:
+    case FLIPADST_FLIPADST:
+    case ADST_FLIPADST:
+    case FLIPADST_ADST:
+      vp10_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
+                              bd);
+      break;
+    case V_DCT:
+    case H_DCT:
+    case V_ADST:
+    case H_ADST:
+    case V_FLIPADST:
+    case H_FLIPADST:
+      // Use C version since DST only exists in C code
+      vp10_highbd_iht8x8_64_add_c(input, dest, stride, tx_type, bd);
+      break;
+    case IDTX:
+      highbd_inv_idtx_add_c(input, dest, stride, 8, tx_type, bd);
+      break;
+#endif  // CONFIG_EXT_TX
+    default: assert(0); break;
+  }
+}
+
+void vp10_highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
+                                    int stride, int eob, int bd,
+                                    TX_TYPE tx_type) {
+  (void)eob;
+  switch (tx_type) {
+    case DCT_DCT:
+    case ADST_DCT:
+    case DCT_ADST:
+    case ADST_ADST:
+      vp10_inv_txfm2d_add_16x16(input, CONVERT_TO_SHORTPTR(dest), stride,
+                                tx_type, bd);
+      break;
+#if CONFIG_EXT_TX
+    case FLIPADST_DCT:
+    case DCT_FLIPADST:
+    case FLIPADST_FLIPADST:
+    case ADST_FLIPADST:
+    case FLIPADST_ADST:
+      vp10_inv_txfm2d_add_16x16(input, CONVERT_TO_SHORTPTR(dest), stride,
+                                tx_type, bd);
+      break;
+    case V_DCT:
+    case H_DCT:
+    case V_ADST:
+    case H_ADST:
+    case V_FLIPADST:
+    case H_FLIPADST:
+      // Use C version since DST only exists in C code
+      vp10_highbd_iht16x16_256_add_c(input, dest, stride, tx_type, bd);
+      break;
+    case IDTX:
+      highbd_inv_idtx_add_c(input, dest, stride, 16, tx_type, bd);
+      break;
+#endif  // CONFIG_EXT_TX
+    default: assert(0); break;
+  }
+}
+
+void vp10_highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
+                                    int stride, int eob, int bd,
+                                    TX_TYPE tx_type) {
+  (void)eob;
+  switch (tx_type) {
+    case DCT_DCT:
+      vp10_inv_txfm2d_add_32x32(input, CONVERT_TO_SHORTPTR(dest), stride,
+                                DCT_DCT, bd);
+      break;
+#if CONFIG_EXT_TX
+    case ADST_DCT:
+    case DCT_ADST:
+    case ADST_ADST:
+    case FLIPADST_DCT:
+    case DCT_FLIPADST:
+    case FLIPADST_FLIPADST:
+    case ADST_FLIPADST:
+    case FLIPADST_ADST:
+    case V_DCT:
+    case H_DCT:
+    case V_ADST:
+    case H_ADST:
+    case V_FLIPADST:
+    case H_FLIPADST:
+      vp10_highbd_iht32x32_1024_add_c(input, dest, stride, tx_type, bd);
+      break;
+    case IDTX:
+      highbd_inv_idtx_add_c(input, dest, stride, 32, tx_type, bd);
+      break;
+#endif  // CONFIG_EXT_TX
+    default: assert(0); break;
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+void inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
+                  INV_TXFM_PARAM *inv_txfm_param) {
+  const TX_TYPE tx_type = inv_txfm_param->tx_type;
+  const TX_SIZE tx_size = inv_txfm_param->tx_size;
+  const int eob = inv_txfm_param->eob;
+  const int lossless = inv_txfm_param->lossless;
+
+  switch (tx_size) {
+    case TX_32X32:
+      vp10_inv_txfm_add_32x32(input, dest, stride, eob, tx_type);
+      break;
+    case TX_16X16:
+      vp10_inv_txfm_add_16x16(input, dest, stride, eob, tx_type);
+      break;
+    case TX_8X8:
+      vp10_inv_txfm_add_8x8(input, dest, stride, eob, tx_type);
+      break;
+#if CONFIG_EXT_TX
+    case TX_4X8:
+      vp10_inv_txfm_add_4x8(input, dest, stride, eob, tx_type);
+      break;
+    case TX_8X4:
+      vp10_inv_txfm_add_8x4(input, dest, stride, eob, tx_type);
+      break;
+    case TX_8X16:
+      vp10_inv_txfm_add_8x16(input, dest, stride, eob, tx_type);
+      break;
+    case TX_16X8:
+      vp10_inv_txfm_add_16x8(input, dest, stride, eob, tx_type);
+      break;
+    case TX_16X32:
+      vp10_inv_txfm_add_16x32(input, dest, stride, eob, tx_type);
+      break;
+    case TX_32X16:
+      vp10_inv_txfm_add_32x16(input, dest, stride, eob, tx_type);
+      break;
+#endif  // CONFIG_EXT_TX
+    case TX_4X4:
+      // this is like vp10_short_idct4x4 but has a special case around eob<=1
+      // which is significant (not just an optimization) for the lossless
+      // case.
+      vp10_inv_txfm_add_4x4(input, dest, stride, eob, tx_type, lossless);
+      break;
+    default: assert(0 && "Invalid transform size"); break;
+  }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
+                         INV_TXFM_PARAM *inv_txfm_param) {
+  const TX_TYPE tx_type = inv_txfm_param->tx_type;
+  const TX_SIZE tx_size = inv_txfm_param->tx_size;
+  const int eob = inv_txfm_param->eob;
+  const int bd = inv_txfm_param->bd;
+  const int lossless = inv_txfm_param->lossless;
+
+  switch (tx_size) {
+    case TX_32X32:
+      vp10_highbd_inv_txfm_add_32x32(input, dest, stride, eob, bd, tx_type);
+      break;
+    case TX_16X16:
+      vp10_highbd_inv_txfm_add_16x16(input, dest, stride, eob, bd, tx_type);
+      break;
+    case TX_8X8:
+      vp10_highbd_inv_txfm_add_8x8(input, dest, stride, eob, bd, tx_type);
+      break;
+#if CONFIG_EXT_TX
+    case TX_4X8:
+      vp10_highbd_inv_txfm_add_4x8(input, dest, stride, eob, bd, tx_type);
+      break;
+    case TX_8X4:
+      vp10_highbd_inv_txfm_add_8x4(input, dest, stride, eob, bd, tx_type);
+      break;
+    case TX_8X16:
+      vp10_highbd_inv_txfm_add_8x16(input, dest, stride, eob, bd, tx_type);
+      break;
+    case TX_16X8:
+      vp10_highbd_inv_txfm_add_16x8(input, dest, stride, eob, bd, tx_type);
+      break;
+    case TX_16X32:
+      vp10_highbd_inv_txfm_add_16x32(input, dest, stride, eob, bd, tx_type);
+      break;
+    case TX_32X16:
+      vp10_highbd_inv_txfm_add_32x16(input, dest, stride, eob, bd, tx_type);
+      break;
+#endif  // CONFIG_EXT_TX
+    case TX_4X4:
+      // this is like vp10_short_idct4x4 but has a special case around eob<=1
+      // which is significant (not just an optimization) for the lossless
+      // case.
+      vp10_highbd_inv_txfm_add_4x4(input, dest, stride, eob, bd, tx_type,
+                                   lossless);
+      break;
+    default: assert(0 && "Invalid transform size"); break;
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
diff --git a/av1/common/idct.h b/av1/common/idct.h
new file mode 100644
index 0000000..9b3be62
--- /dev/null
+++ b/av1/common/idct.h
@@ -0,0 +1,118 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_IDCT_H_
+#define VP10_COMMON_IDCT_H_
+
+#include <assert.h>
+
+#include "./vpx_config.h"
+#include "av1/common/blockd.h"
+#include "av1/common/common.h"
+#include "av1/common/enums.h"
+#include "aom_dsp/inv_txfm.h"
+#include "aom_dsp/txfm_common.h"
+#include "aom_ports/mem.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct INV_TXFM_PARAM {
+  TX_TYPE tx_type;
+  TX_SIZE tx_size;
+  int eob;
+  int lossless;
+#if CONFIG_VP9_HIGHBITDEPTH
+  int bd;
+#endif
+} INV_TXFM_PARAM;
+
+typedef void (*transform_1d)(const tran_low_t *, tran_low_t *);
+
+typedef struct {
+  transform_1d cols, rows;  // vertical and horizontal
+} transform_2d;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+typedef void (*highbd_transform_1d)(const tran_low_t *, tran_low_t *, int bd);
+
+typedef struct {
+  highbd_transform_1d cols, rows;  // vertical and horizontal
+} highbd_transform_2d;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#define MAX_TX_SCALE 1
+int get_tx_scale(const MACROBLOCKD *const xd, const TX_TYPE tx_type,
+                 const TX_SIZE tx_size);
+
+void vp10_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
+                      int eob);
+void vp10_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
+                      int eob);
+void vp10_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
+                      int eob);
+void vp10_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
+                        int eob);
+void vp10_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
+                        int eob);
+
+void vp10_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
+                           int eob, TX_TYPE tx_type, int lossless);
+#if CONFIG_EXT_TX
+void vp10_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest, int stride,
+                           int eob, TX_TYPE tx_type);
+void vp10_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest, int stride,
+                           int eob, TX_TYPE tx_type);
+#endif  // CONFIG_EXT_TX
+void vp10_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride,
+                           int eob, TX_TYPE tx_type);
+void vp10_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, int stride,
+                             int eob, TX_TYPE tx_type);
+void vp10_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest, int stride,
+                             int eob, TX_TYPE tx_type);
+void inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
+                  INV_TXFM_PARAM *inv_txfm_param);
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
+                             int eob, int bd);
+void vp10_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
+                             int eob, int bd);
+void vp10_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
+                             int eob, int bd);
+void vp10_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest,
+                               int stride, int eob, int bd);
+void vp10_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest,
+                               int stride, int eob, int bd);
+void vp10_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
+                                  int stride, int eob, int bd, TX_TYPE tx_type,
+                                  int lossless);
+#if CONFIG_EXT_TX
+void vp10_highbd_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
+                                  int stride, int eob, int bd, TX_TYPE tx_type);
+void vp10_highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
+                                  int stride, int eob, int bd, TX_TYPE tx_type);
+#endif  // CONFIG_EXT_TX
+void vp10_highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
+                                  int stride, int eob, int bd, TX_TYPE tx_type);
+void vp10_highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
+                                    int stride, int eob, int bd,
+                                    TX_TYPE tx_type);
+void vp10_highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
+                                    int stride, int eob, int bd,
+                                    TX_TYPE tx_type);
+void highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
+                         INV_TXFM_PARAM *inv_txfm_param);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_IDCT_H_
diff --git a/av1/common/intra_filters.h b/av1/common/intra_filters.h
new file mode 100644
index 0000000..021fb8e
--- /dev/null
+++ b/av1/common/intra_filters.h
@@ -0,0 +1,67 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_INTRA_FILTERS_H_
+#define VP10_COMMON_INTRA_FILTERS_H_
+
+#define FILTER_INTRA_PREC_BITS (10)
+
+static int filter_intra_taps_4[TX_SIZES][INTRA_MODES][4] = {
+  {
+      { 735, 881, -537, -54 },
+      { 1005, 519, -488, -11 },
+      { 383, 990, -343, -6 },
+      { 442, 805, -542, 319 },
+      { 658, 616, -133, -116 },
+      { 875, 442, -141, -151 },
+      { 386, 741, -23, -80 },
+      { 390, 1027, -446, 51 },
+      { 679, 606, -523, 262 },
+      { 903, 922, -778, -23 },
+  },
+  {
+      { 648, 803, -444, 16 },
+      { 972, 620, -576, 7 },
+      { 561, 967, -499, -5 },
+      { 585, 762, -468, 144 },
+      { 596, 619, -182, -9 },
+      { 895, 459, -176, -153 },
+      { 557, 722, -126, -129 },
+      { 601, 839, -523, 105 },
+      { 562, 709, -499, 251 },
+      { 803, 872, -695, 43 },
+  },
+  {
+      { 423, 728, -347, 111 },
+      { 963, 685, -665, 23 },
+      { 281, 1024, -480, 216 },
+      { 640, 596, -437, 78 },
+      { 429, 669, -259, 99 },
+      { 740, 646, -415, 23 },
+      { 568, 771, -346, 40 },
+      { 404, 833, -486, 209 },
+      { 398, 712, -423, 307 },
+      { 939, 935, -887, 17 },
+  },
+  {
+      { 477, 737, -393, 150 },
+      { 881, 630, -546, 67 },
+      { 506, 984, -443, -20 },
+      { 114, 459, -270, 528 },
+      { 433, 528, 14, 3 },
+      { 837, 470, -301, -30 },
+      { 181, 777, 89, -107 },
+      { -29, 716, -232, 259 },
+      { 589, 646, -495, 255 },
+      { 740, 884, -728, 77 },
+  },
+};
+
+#endif  // VP10_COMMON_INTRA_FILTERS_H_
diff --git a/av1/common/loopfilter.c b/av1/common/loopfilter.c
new file mode 100644
index 0000000..e4636a5
--- /dev/null
+++ b/av1/common/loopfilter.c
@@ -0,0 +1,1697 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "av1/common/loopfilter.h"
+#include "av1/common/onyxc_int.h"
+#include "av1/common/reconinter.h"
+#include "av1/common/restoration.h"
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_mem/vpx_mem.h"
+#include "aom_ports/mem.h"
+
+#include "av1/common/seg_common.h"
+
+// 64 bit masks for left transform size. Each 1 represents a position where
+// we should apply a loop filter across the left border of an 8x8 block
+// boundary.
+//
+// In the case of TX_16X16->  ( in low order byte first we end up with
+// a mask that looks like this
+//
+//    10101010
+//    10101010
+//    10101010
+//    10101010
+//    10101010
+//    10101010
+//    10101010
+//    10101010
+//
+// A loopfilter should be applied to every other 8x8 horizontally.
+static const uint64_t left_64x64_txform_mask[TX_SIZES] = {
+  0xffffffffffffffffULL,  // TX_4X4
+  0xffffffffffffffffULL,  // TX_8x8
+  0x5555555555555555ULL,  // TX_16x16
+  0x1111111111111111ULL,  // TX_32x32
+};
+
+// 64 bit masks for above transform size. Each 1 represents a position where
+// we should apply a loop filter across the top border of an 8x8 block
+// boundary.
+//
+// In the case of TX_32x32 ->  ( in low order byte first we end up with
+// a mask that looks like this
+//
+//    11111111
+//    00000000
+//    00000000
+//    00000000
+//    11111111
+//    00000000
+//    00000000
+//    00000000
+//
+// A loopfilter should be applied to every other 4 the row vertically.
+static const uint64_t above_64x64_txform_mask[TX_SIZES] = {
+  0xffffffffffffffffULL,  // TX_4X4
+  0xffffffffffffffffULL,  // TX_8x8
+  0x00ff00ff00ff00ffULL,  // TX_16x16
+  0x000000ff000000ffULL,  // TX_32x32
+};
+
+// 64 bit masks for prediction sizes (left). Each 1 represents a position
+// where left border of an 8x8 block. These are aligned to the right most
+// appropriate bit, and then shifted into place.
+//
+// In the case of TX_16x32 ->  ( low order byte first ) we end up with
+// a mask that looks like this :
+//
+//  10000000
+//  10000000
+//  10000000
+//  10000000
+//  00000000
+//  00000000
+//  00000000
+//  00000000
+static const uint64_t left_prediction_mask[BLOCK_SIZES] = {
+  0x0000000000000001ULL,  // BLOCK_4X4,
+  0x0000000000000001ULL,  // BLOCK_4X8,
+  0x0000000000000001ULL,  // BLOCK_8X4,
+  0x0000000000000001ULL,  // BLOCK_8X8,
+  0x0000000000000101ULL,  // BLOCK_8X16,
+  0x0000000000000001ULL,  // BLOCK_16X8,
+  0x0000000000000101ULL,  // BLOCK_16X16,
+  0x0000000001010101ULL,  // BLOCK_16X32,
+  0x0000000000000101ULL,  // BLOCK_32X16,
+  0x0000000001010101ULL,  // BLOCK_32X32,
+  0x0101010101010101ULL,  // BLOCK_32X64,
+  0x0000000001010101ULL,  // BLOCK_64X32,
+  0x0101010101010101ULL,  // BLOCK_64X64
+};
+
+// 64 bit mask to shift and set for each prediction size.
+static const uint64_t above_prediction_mask[BLOCK_SIZES] = {
+  0x0000000000000001ULL,  // BLOCK_4X4
+  0x0000000000000001ULL,  // BLOCK_4X8
+  0x0000000000000001ULL,  // BLOCK_8X4
+  0x0000000000000001ULL,  // BLOCK_8X8
+  0x0000000000000001ULL,  // BLOCK_8X16,
+  0x0000000000000003ULL,  // BLOCK_16X8
+  0x0000000000000003ULL,  // BLOCK_16X16
+  0x0000000000000003ULL,  // BLOCK_16X32,
+  0x000000000000000fULL,  // BLOCK_32X16,
+  0x000000000000000fULL,  // BLOCK_32X32,
+  0x000000000000000fULL,  // BLOCK_32X64,
+  0x00000000000000ffULL,  // BLOCK_64X32,
+  0x00000000000000ffULL,  // BLOCK_64X64
+};
+// 64 bit mask to shift and set for each prediction size. A bit is set for
+// each 8x8 block that would be in the left most block of the given block
+// size in the 64x64 block.
+static const uint64_t size_mask[BLOCK_SIZES] = {
+  0x0000000000000001ULL,  // BLOCK_4X4
+  0x0000000000000001ULL,  // BLOCK_4X8
+  0x0000000000000001ULL,  // BLOCK_8X4
+  0x0000000000000001ULL,  // BLOCK_8X8
+  0x0000000000000101ULL,  // BLOCK_8X16,
+  0x0000000000000003ULL,  // BLOCK_16X8
+  0x0000000000000303ULL,  // BLOCK_16X16
+  0x0000000003030303ULL,  // BLOCK_16X32,
+  0x0000000000000f0fULL,  // BLOCK_32X16,
+  0x000000000f0f0f0fULL,  // BLOCK_32X32,
+  0x0f0f0f0f0f0f0f0fULL,  // BLOCK_32X64,
+  0x00000000ffffffffULL,  // BLOCK_64X32,
+  0xffffffffffffffffULL,  // BLOCK_64X64
+};
+
+// These are used for masking the left and above borders.
+static const uint64_t left_border = 0x1111111111111111ULL;
+static const uint64_t above_border = 0x000000ff000000ffULL;
+
+// 16 bit masks for uv transform sizes.
+static const uint16_t left_64x64_txform_mask_uv[TX_SIZES] = {
+  0xffff,  // TX_4X4
+  0xffff,  // TX_8x8
+  0x5555,  // TX_16x16
+  0x1111,  // TX_32x32
+};
+
+static const uint16_t above_64x64_txform_mask_uv[TX_SIZES] = {
+  0xffff,  // TX_4X4
+  0xffff,  // TX_8x8
+  0x0f0f,  // TX_16x16
+  0x000f,  // TX_32x32
+};
+
+// 16 bit left mask to shift and set for each uv prediction size.
+static const uint16_t left_prediction_mask_uv[BLOCK_SIZES] = {
+  0x0001,  // BLOCK_4X4,
+  0x0001,  // BLOCK_4X8,
+  0x0001,  // BLOCK_8X4,
+  0x0001,  // BLOCK_8X8,
+  0x0001,  // BLOCK_8X16,
+  0x0001,  // BLOCK_16X8,
+  0x0001,  // BLOCK_16X16,
+  0x0011,  // BLOCK_16X32,
+  0x0001,  // BLOCK_32X16,
+  0x0011,  // BLOCK_32X32,
+  0x1111,  // BLOCK_32X64
+  0x0011,  // BLOCK_64X32,
+  0x1111,  // BLOCK_64X64
+};
+// 16 bit above mask to shift and set for uv each prediction size.
+static const uint16_t above_prediction_mask_uv[BLOCK_SIZES] = {
+  0x0001,  // BLOCK_4X4
+  0x0001,  // BLOCK_4X8
+  0x0001,  // BLOCK_8X4
+  0x0001,  // BLOCK_8X8
+  0x0001,  // BLOCK_8X16,
+  0x0001,  // BLOCK_16X8
+  0x0001,  // BLOCK_16X16
+  0x0001,  // BLOCK_16X32,
+  0x0003,  // BLOCK_32X16,
+  0x0003,  // BLOCK_32X32,
+  0x0003,  // BLOCK_32X64,
+  0x000f,  // BLOCK_64X32,
+  0x000f,  // BLOCK_64X64
+};
+
+// 64 bit mask to shift and set for each uv prediction size
+static const uint16_t size_mask_uv[BLOCK_SIZES] = {
+  0x0001,  // BLOCK_4X4
+  0x0001,  // BLOCK_4X8
+  0x0001,  // BLOCK_8X4
+  0x0001,  // BLOCK_8X8
+  0x0001,  // BLOCK_8X16,
+  0x0001,  // BLOCK_16X8
+  0x0001,  // BLOCK_16X16
+  0x0011,  // BLOCK_16X32,
+  0x0003,  // BLOCK_32X16,
+  0x0033,  // BLOCK_32X32,
+  0x3333,  // BLOCK_32X64,
+  0x00ff,  // BLOCK_64X32,
+  0xffff,  // BLOCK_64X64
+};
+static const uint16_t left_border_uv = 0x1111;
+static const uint16_t above_border_uv = 0x000f;
+
+static const int mode_lf_lut[MB_MODE_COUNT] = {
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // INTRA_MODES
+  1, 1, 0, 1                     // INTER_MODES (ZEROMV == 0)
+#if CONFIG_EXT_INTER
+  ,
+  1,                            // NEWFROMNEARMV mode
+  1, 1, 1, 1, 1, 1, 1, 1, 0, 1  // INTER_COMPOUND_MODES (ZERO_ZEROMV == 0)
+#endif                          // CONFIG_EXT_INTER
+};
+
+static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
+  int lvl;
+
+  // For each possible value for the loop filter fill out limits
+  for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) {
+    // Set loop filter parameters that control sharpness.
+    int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
+
+    if (sharpness_lvl > 0) {
+      if (block_inside_limit > (9 - sharpness_lvl))
+        block_inside_limit = (9 - sharpness_lvl);
+    }
+
+    if (block_inside_limit < 1) block_inside_limit = 1;
+
+    memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH);
+    memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
+           SIMD_WIDTH);
+  }
+}
+
+static uint8_t get_filter_level(const loop_filter_info_n *lfi_n,
+                                const MB_MODE_INFO *mbmi) {
+#if CONFIG_SUPERTX
+  const int segment_id = VPXMIN(mbmi->segment_id, mbmi->segment_id_supertx);
+  assert(
+      IMPLIES(supertx_enabled(mbmi), mbmi->segment_id_supertx != MAX_SEGMENTS));
+  assert(IMPLIES(supertx_enabled(mbmi),
+                 mbmi->segment_id_supertx <= mbmi->segment_id));
+#else
+  const int segment_id = mbmi->segment_id;
+#endif  // CONFIG_SUPERTX
+  return lfi_n->lvl[segment_id][mbmi->ref_frame[0]][mode_lf_lut[mbmi->mode]];
+}
+
+void vp10_loop_filter_init(VP10_COMMON *cm) {
+  loop_filter_info_n *lfi = &cm->lf_info;
+  struct loopfilter *lf = &cm->lf;
+  int lvl;
+
+  // init limits for given sharpness
+  update_sharpness(lfi, lf->sharpness_level);
+  lf->last_sharpness_level = lf->sharpness_level;
+
+  // init hev threshold const vectors
+  for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++)
+    memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
+}
+
+void vp10_loop_filter_frame_init(VP10_COMMON *cm, int default_filt_lvl) {
+  int seg_id;
+  // n_shift is the multiplier for lf_deltas
+  // the multiplier is 1 for when filter_lvl is between 0 and 31;
+  // 2 when filter_lvl is between 32 and 63
+  const int scale = 1 << (default_filt_lvl >> 5);
+  loop_filter_info_n *const lfi = &cm->lf_info;
+  struct loopfilter *const lf = &cm->lf;
+  const struct segmentation *const seg = &cm->seg;
+
+  // update limits if sharpness has changed
+  if (lf->last_sharpness_level != lf->sharpness_level) {
+    update_sharpness(lfi, lf->sharpness_level);
+    lf->last_sharpness_level = lf->sharpness_level;
+  }
+
+  for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
+    int lvl_seg = default_filt_lvl;
+    if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
+      const int data = get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
+      lvl_seg = clamp(
+          seg->abs_delta == SEGMENT_ABSDATA ? data : default_filt_lvl + data, 0,
+          MAX_LOOP_FILTER);
+    }
+
+    if (!lf->mode_ref_delta_enabled) {
+      // we could get rid of this if we assume that deltas are set to
+      // zero when not in use; encoder always uses deltas
+      memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id]));
+    } else {
+      int ref, mode;
+      const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
+      lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER);
+
+      for (ref = LAST_FRAME; ref < TOTAL_REFS_PER_FRAME; ++ref) {
+        for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
+          const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale +
+                                lf->mode_deltas[mode] * scale;
+          lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER);
+        }
+      }
+    }
+  }
+}
+
+static void filter_selectively_vert_row2(int subsampling_factor, uint8_t *s,
+                                         int pitch, unsigned int mask_16x16_l,
+                                         unsigned int mask_8x8_l,
+                                         unsigned int mask_4x4_l,
+                                         unsigned int mask_4x4_int_l,
+                                         const loop_filter_info_n *lfi_n,
+                                         const uint8_t *lfl) {
+  const int mask_shift = subsampling_factor ? 4 : 8;
+  const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
+  const int lfl_forward = subsampling_factor ? 4 : 8;
+
+  unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
+  unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
+  unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
+  unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
+  unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
+  unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
+  unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
+  unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
+  unsigned int mask;
+
+  for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
+              mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
+       mask; mask >>= 1) {
+    const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
+    const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
+
+    if (mask & 1) {
+      if ((mask_16x16_0 | mask_16x16_1) & 1) {
+        if ((mask_16x16_0 & mask_16x16_1) & 1) {
+          vpx_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
+                                   lfi0->hev_thr);
+        } else if (mask_16x16_0 & 1) {
+          vpx_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
+        } else {
+          vpx_lpf_vertical_16(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
+                              lfi1->hev_thr);
+        }
+      }
+
+      if ((mask_8x8_0 | mask_8x8_1) & 1) {
+        if ((mask_8x8_0 & mask_8x8_1) & 1) {
+          vpx_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
+                                  lfi0->hev_thr, lfi1->mblim, lfi1->lim,
+                                  lfi1->hev_thr);
+        } else if (mask_8x8_0 & 1) {
+          vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
+        } else {
+          vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
+                             lfi1->hev_thr);
+        }
+      }
+
+      if ((mask_4x4_0 | mask_4x4_1) & 1) {
+        if ((mask_4x4_0 & mask_4x4_1) & 1) {
+          vpx_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
+                                  lfi0->hev_thr, lfi1->mblim, lfi1->lim,
+                                  lfi1->hev_thr);
+        } else if (mask_4x4_0 & 1) {
+          vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
+        } else {
+          vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
+                             lfi1->hev_thr);
+        }
+      }
+
+      if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
+        if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
+          vpx_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
+                                  lfi0->hev_thr, lfi1->mblim, lfi1->lim,
+                                  lfi1->hev_thr);
+        } else if (mask_4x4_int_0 & 1) {
+          vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
+                             lfi0->hev_thr);
+        } else {
+          vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
+                             lfi1->hev_thr);
+        }
+      }
+    }
+
+    s += 8;
+    lfl += 1;
+    mask_16x16_0 >>= 1;
+    mask_8x8_0 >>= 1;
+    mask_4x4_0 >>= 1;
+    mask_4x4_int_0 >>= 1;
+    mask_16x16_1 >>= 1;
+    mask_8x8_1 >>= 1;
+    mask_4x4_1 >>= 1;
+    mask_4x4_int_1 >>= 1;
+  }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_filter_selectively_vert_row2(
+    int subsampling_factor, uint16_t *s, int pitch, unsigned int mask_16x16_l,
+    unsigned int mask_8x8_l, unsigned int mask_4x4_l,
+    unsigned int mask_4x4_int_l, const loop_filter_info_n *lfi_n,
+    const uint8_t *lfl, int bd) {
+  const int mask_shift = subsampling_factor ? 4 : 8;
+  const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
+  const int lfl_forward = subsampling_factor ? 4 : 8;
+
+  unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
+  unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
+  unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
+  unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
+  unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
+  unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
+  unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
+  unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
+  unsigned int mask;
+
+  for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
+              mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
+       mask; mask >>= 1) {
+    const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
+    const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
+
+    if (mask & 1) {
+      if ((mask_16x16_0 | mask_16x16_1) & 1) {
+        if ((mask_16x16_0 & mask_16x16_1) & 1) {
+          vpx_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
+                                          lfi0->hev_thr, bd);
+        } else if (mask_16x16_0 & 1) {
+          vpx_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
+                                     lfi0->hev_thr, bd);
+        } else {
+          vpx_highbd_lpf_vertical_16(s + 8 * pitch, pitch, lfi1->mblim,
+                                     lfi1->lim, lfi1->hev_thr, bd);
+        }
+      }
+
+      if ((mask_8x8_0 | mask_8x8_1) & 1) {
+        if ((mask_8x8_0 & mask_8x8_1) & 1) {
+          vpx_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
+                                         lfi0->hev_thr, lfi1->mblim, lfi1->lim,
+                                         lfi1->hev_thr, bd);
+        } else if (mask_8x8_0 & 1) {
+          vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
+                                    lfi0->hev_thr, bd);
+        } else {
+          vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
+                                    lfi1->lim, lfi1->hev_thr, bd);
+        }
+      }
+
+      if ((mask_4x4_0 | mask_4x4_1) & 1) {
+        if ((mask_4x4_0 & mask_4x4_1) & 1) {
+          vpx_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
+                                         lfi0->hev_thr, lfi1->mblim, lfi1->lim,
+                                         lfi1->hev_thr, bd);
+        } else if (mask_4x4_0 & 1) {
+          vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
+                                    lfi0->hev_thr, bd);
+        } else {
+          vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
+                                    lfi1->lim, lfi1->hev_thr, bd);
+        }
+      }
+
+      if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
+        if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
+          vpx_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
+                                         lfi0->hev_thr, lfi1->mblim, lfi1->lim,
+                                         lfi1->hev_thr, bd);
+        } else if (mask_4x4_int_0 & 1) {
+          vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
+                                    lfi0->hev_thr, bd);
+        } else {
+          vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
+                                    lfi1->lim, lfi1->hev_thr, bd);
+        }
+      }
+    }
+
+    s += 8;
+    lfl += 1;
+    mask_16x16_0 >>= 1;
+    mask_8x8_0 >>= 1;
+    mask_4x4_0 >>= 1;
+    mask_4x4_int_0 >>= 1;
+    mask_16x16_1 >>= 1;
+    mask_8x8_1 >>= 1;
+    mask_4x4_1 >>= 1;
+    mask_4x4_int_1 >>= 1;
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+static void filter_selectively_horiz(
+    uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
+    unsigned int mask_4x4, unsigned int mask_4x4_int,
+    const loop_filter_info_n *lfi_n, const uint8_t *lfl) {
+  unsigned int mask;
+  int count;
+
+  for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
+       mask >>= count) {
+    const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
+
+    count = 1;
+    if (mask & 1) {
+      if (mask_16x16 & 1) {
+        if ((mask_16x16 & 3) == 3) {
+          vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
+                                     lfi->hev_thr);
+          count = 2;
+        } else {
+          vpx_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
+                                    lfi->hev_thr);
+        }
+      } else if (mask_8x8 & 1) {
+        if ((mask_8x8 & 3) == 3) {
+          // Next block's thresholds.
+          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+
+          vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
+                                    lfi->hev_thr, lfin->mblim, lfin->lim,
+                                    lfin->hev_thr);
+
+          if ((mask_4x4_int & 3) == 3) {
+            vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
+                                      lfi->lim, lfi->hev_thr, lfin->mblim,
+                                      lfin->lim, lfin->hev_thr);
+          } else {
+            if (mask_4x4_int & 1)
+              vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+                                   lfi->hev_thr);
+            else if (mask_4x4_int & 2)
+              vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+                                   lfin->lim, lfin->hev_thr);
+          }
+          count = 2;
+        } else {
+          vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
+
+          if (mask_4x4_int & 1)
+            vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+                                 lfi->hev_thr);
+        }
+      } else if (mask_4x4 & 1) {
+        if ((mask_4x4 & 3) == 3) {
+          // Next block's thresholds.
+          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+
+          vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
+                                    lfi->hev_thr, lfin->mblim, lfin->lim,
+                                    lfin->hev_thr);
+          if ((mask_4x4_int & 3) == 3) {
+            vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
+                                      lfi->lim, lfi->hev_thr, lfin->mblim,
+                                      lfin->lim, lfin->hev_thr);
+          } else {
+            if (mask_4x4_int & 1)
+              vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+                                   lfi->hev_thr);
+            else if (mask_4x4_int & 2)
+              vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+                                   lfin->lim, lfin->hev_thr);
+          }
+          count = 2;
+        } else {
+          vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
+
+          if (mask_4x4_int & 1)
+            vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+                                 lfi->hev_thr);
+        }
+      } else if (mask_4x4_int & 1) {
+        vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+                             lfi->hev_thr);
+      }
+    }
+    s += 8 * count;
+    lfl += count;
+    mask_16x16 >>= count;
+    mask_8x8 >>= count;
+    mask_4x4 >>= count;
+    mask_4x4_int >>= count;
+  }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_filter_selectively_horiz(
+    uint16_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
+    unsigned int mask_4x4, unsigned int mask_4x4_int,
+    const loop_filter_info_n *lfi_n, const uint8_t *lfl, int bd) {
+  unsigned int mask;
+  int count;
+
+  for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
+       mask >>= count) {
+    const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
+
+    count = 1;
+    if (mask & 1) {
+      if (mask_16x16 & 1) {
+        if ((mask_16x16 & 3) == 3) {
+          vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
+                                            lfi->hev_thr, bd);
+          count = 2;
+        } else {
+          vpx_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
+                                           lfi->hev_thr, bd);
+        }
+      } else if (mask_8x8 & 1) {
+        if ((mask_8x8 & 3) == 3) {
+          // Next block's thresholds.
+          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+
+          vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
+                                           lfi->hev_thr, lfin->mblim, lfin->lim,
+                                           lfin->hev_thr, bd);
+
+          if ((mask_4x4_int & 3) == 3) {
+            vpx_highbd_lpf_horizontal_4_dual(
+                s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
+                lfin->mblim, lfin->lim, lfin->hev_thr, bd);
+          } else {
+            if (mask_4x4_int & 1) {
+              vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
+                                          lfi->lim, lfi->hev_thr, bd);
+            } else if (mask_4x4_int & 2) {
+              vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+                                          lfin->lim, lfin->hev_thr, bd);
+            }
+          }
+          count = 2;
+        } else {
+          vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,
+                                      lfi->hev_thr, bd);
+
+          if (mask_4x4_int & 1) {
+            vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
+                                        lfi->lim, lfi->hev_thr, bd);
+          }
+        }
+      } else if (mask_4x4 & 1) {
+        if ((mask_4x4 & 3) == 3) {
+          // Next block's thresholds.
+          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+
+          vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
+                                           lfi->hev_thr, lfin->mblim, lfin->lim,
+                                           lfin->hev_thr, bd);
+          if ((mask_4x4_int & 3) == 3) {
+            vpx_highbd_lpf_horizontal_4_dual(
+                s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
+                lfin->mblim, lfin->lim, lfin->hev_thr, bd);
+          } else {
+            if (mask_4x4_int & 1) {
+              vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
+                                          lfi->lim, lfi->hev_thr, bd);
+            } else if (mask_4x4_int & 2) {
+              vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+                                          lfin->lim, lfin->hev_thr, bd);
+            }
+          }
+          count = 2;
+        } else {
+          vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
+                                      lfi->hev_thr, bd);
+
+          if (mask_4x4_int & 1) {
+            vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
+                                        lfi->lim, lfi->hev_thr, bd);
+          }
+        }
+      } else if (mask_4x4_int & 1) {
+        vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+                                    lfi->hev_thr, bd);
+      }
+    }
+    s += 8 * count;
+    lfl += count;
+    mask_16x16 >>= count;
+    mask_8x8 >>= count;
+    mask_4x4 >>= count;
+    mask_4x4_int >>= count;
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+// This function ors into the current lfm structure, where to do loop
+// filters for the specific mi we are looking at. It uses information
+// including the block_size_type (32x16, 32x32, etc.), the transform size,
+// whether there were any coefficients encoded, and the loop filter strength
+// block we are currently looking at. Shift is used to position the
+// 1's we produce.
+// TODO(JBB) Need another function for different resolution color..
+static void build_masks(const loop_filter_info_n *const lfi_n,
+                        const MODE_INFO *mi, const int shift_y,
+                        const int shift_uv, LOOP_FILTER_MASK *lfm) {
+  const MB_MODE_INFO *mbmi = &mi->mbmi;
+  const BLOCK_SIZE block_size = mbmi->sb_type;
+  // TODO(debargha): Check if masks can be setup correctly when
+  // rectangular transfroms are used with the EXT_TX expt.
+  const TX_SIZE tx_size_y = txsize_sqr_up_map[mbmi->tx_size];
+  const TX_SIZE tx_size_uv =
+      get_uv_tx_size_impl(mbmi->tx_size, block_size, 1, 1);
+  const int filter_level = get_filter_level(lfi_n, mbmi);
+  uint64_t *const left_y = &lfm->left_y[tx_size_y];
+  uint64_t *const above_y = &lfm->above_y[tx_size_y];
+  uint64_t *const int_4x4_y = &lfm->int_4x4_y;
+  uint16_t *const left_uv = &lfm->left_uv[tx_size_uv];
+  uint16_t *const above_uv = &lfm->above_uv[tx_size_uv];
+  uint16_t *const int_4x4_uv = &lfm->left_int_4x4_uv;
+  int i;
+
+  // If filter level is 0 we don't loop filter.
+  if (!filter_level) {
+    return;
+  } else {
+    const int w = num_8x8_blocks_wide_lookup[block_size];
+    const int h = num_8x8_blocks_high_lookup[block_size];
+    const int row = (shift_y >> MAX_MIB_SIZE_LOG2);
+    const int col = shift_y - (row << MAX_MIB_SIZE_LOG2);
+
+    for (i = 0; i < h; i++) memset(&lfm->lfl_y[row + i][col], filter_level, w);
+  }
+
+  // These set 1 in the current block size for the block size edges.
+  // For instance if the block size is 32x16, we'll set:
+  //    above =   1111
+  //              0000
+  //    and
+  //    left  =   1000
+  //          =   1000
+  // NOTE : In this example the low bit is left most ( 1000 ) is stored as
+  //        1,  not 8...
+  //
+  // U and V set things on a 16 bit scale.
+  //
+  *above_y |= above_prediction_mask[block_size] << shift_y;
+  *above_uv |= above_prediction_mask_uv[block_size] << shift_uv;
+  *left_y |= left_prediction_mask[block_size] << shift_y;
+  *left_uv |= left_prediction_mask_uv[block_size] << shift_uv;
+
+  // If the block has no coefficients and is not intra we skip applying
+  // the loop filter on block edges.
+  if ((mbmi->skip || mbmi->has_no_coeffs) && is_inter_block(mbmi)) return;
+
+  // Here we are adding a mask for the transform size. The transform
+  // size mask is set to be correct for a 64x64 prediction block size. We
+  // mask to match the size of the block we are working on and then shift it
+  // into place..
+  *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y])
+              << shift_y;
+  *above_uv |=
+      (size_mask_uv[block_size] & above_64x64_txform_mask_uv[tx_size_uv])
+      << shift_uv;
+
+  *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y])
+             << shift_y;
+  *left_uv |= (size_mask_uv[block_size] & left_64x64_txform_mask_uv[tx_size_uv])
+              << shift_uv;
+
+  // Here we are trying to determine what to do with the internal 4x4 block
+  // boundaries.  These differ from the 4x4 boundaries on the outside edge of
+  // an 8x8 in that the internal ones can be skipped and don't depend on
+  // the prediction block size.
+  if (tx_size_y == TX_4X4)
+    *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffffULL) << shift_y;
+
+  if (tx_size_uv == TX_4X4)
+    *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv;
+}
+
+// This function does the same thing as the one above with the exception that
+// it only affects the y masks. It exists because for blocks < 16x16 in size,
+// we only update u and v masks on the first block.
+static void build_y_mask(const loop_filter_info_n *const lfi_n,
+                         const MODE_INFO *mi, const int shift_y,
+#if CONFIG_SUPERTX
+                         int supertx_enabled,
+#endif  // CONFIG_SUPERTX
+                         LOOP_FILTER_MASK *lfm) {
+  const MB_MODE_INFO *mbmi = &mi->mbmi;
+  const TX_SIZE tx_size_y = txsize_sqr_up_map[mbmi->tx_size];
+#if CONFIG_SUPERTX
+  const BLOCK_SIZE block_size =
+      supertx_enabled ? (BLOCK_SIZE)(3 * tx_size_y) : mbmi->sb_type;
+#else
+  const BLOCK_SIZE block_size = mbmi->sb_type;
+#endif
+  const int filter_level = get_filter_level(lfi_n, mbmi);
+  uint64_t *const left_y = &lfm->left_y[tx_size_y];
+  uint64_t *const above_y = &lfm->above_y[tx_size_y];
+  uint64_t *const int_4x4_y = &lfm->int_4x4_y;
+  int i;
+
+  if (!filter_level) {
+    return;
+  } else {
+    const int w = num_8x8_blocks_wide_lookup[block_size];
+    const int h = num_8x8_blocks_high_lookup[block_size];
+    const int row = (shift_y >> MAX_MIB_SIZE_LOG2);
+    const int col = shift_y - (row << MAX_MIB_SIZE_LOG2);
+
+    for (i = 0; i < h; i++) memset(&lfm->lfl_y[row + i][col], filter_level, w);
+  }
+
+  *above_y |= above_prediction_mask[block_size] << shift_y;
+  *left_y |= left_prediction_mask[block_size] << shift_y;
+
+  if ((mbmi->skip || mbmi->has_no_coeffs) && is_inter_block(mbmi)) return;
+
+  *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y])
+              << shift_y;
+
+  *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y])
+             << shift_y;
+
+  if (tx_size_y == TX_4X4)
+    *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffffULL) << shift_y;
+}
+
+// This function sets up the bit masks for the entire 64x64 region represented
+// by mi_row, mi_col.
+// TODO(JBB): This function only works for yv12.
+void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col,
+                     MODE_INFO **mi, const int mode_info_stride,
+                     LOOP_FILTER_MASK *lfm) {
+  int idx_32, idx_16, idx_8;
+  const loop_filter_info_n *const lfi_n = &cm->lf_info;
+  MODE_INFO **mip = mi;
+  MODE_INFO **mip2 = mi;
+
+  // These are offsets to the next mi in the 64x64 block. It is what gets
+  // added to the mi ptr as we go through each loop. It helps us to avoid
+  // setting up special row and column counters for each index. The last step
+  // brings us out back to the starting position.
+  const int offset_32[] = { 4, (mode_info_stride << 2) - 4, 4,
+                            -(mode_info_stride << 2) - 4 };
+  const int offset_16[] = { 2, (mode_info_stride << 1) - 2, 2,
+                            -(mode_info_stride << 1) - 2 };
+  const int offset[] = { 1, mode_info_stride - 1, 1, -mode_info_stride - 1 };
+
+  // Following variables represent shifts to position the current block
+  // mask over the appropriate block. A shift of 36 to the left will move
+  // the bits for the final 32 by 32 block in the 64x64 up 4 rows and left
+  // 4 rows to the appropriate spot.
+  const int shift_32_y[] = { 0, 4, 32, 36 };
+  const int shift_16_y[] = { 0, 2, 16, 18 };
+  const int shift_8_y[] = { 0, 1, 8, 9 };
+  const int shift_32_uv[] = { 0, 2, 8, 10 };
+  const int shift_16_uv[] = { 0, 1, 4, 5 };
+  int i;
+  const int max_rows = VPXMIN(cm->mi_rows - mi_row, MAX_MIB_SIZE);
+  const int max_cols = VPXMIN(cm->mi_cols - mi_col, MAX_MIB_SIZE);
+#if CONFIG_EXT_PARTITION
+  assert(0 && "Not yet updated");
+#endif  // CONFIG_EXT_PARTITION
+
+  vp10_zero(*lfm);
+  assert(mip[0] != NULL);
+
+  // TODO(jimbankoski): Try moving most of the following code into decode
+  // loop and storing lfm in the mbmi structure so that we don't have to go
+  // through the recursive loop structure multiple times.
+  switch (mip[0]->mbmi.sb_type) {
+    case BLOCK_64X64: build_masks(lfi_n, mip[0], 0, 0, lfm); break;
+    case BLOCK_64X32:
+      build_masks(lfi_n, mip[0], 0, 0, lfm);
+      mip2 = mip + mode_info_stride * 4;
+      if (4 >= max_rows) break;
+      build_masks(lfi_n, mip2[0], 32, 8, lfm);
+      break;
+    case BLOCK_32X64:
+      build_masks(lfi_n, mip[0], 0, 0, lfm);
+      mip2 = mip + 4;
+      if (4 >= max_cols) break;
+      build_masks(lfi_n, mip2[0], 4, 2, lfm);
+      break;
+    default:
+      for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) {
+        const int shift_y = shift_32_y[idx_32];
+        const int shift_uv = shift_32_uv[idx_32];
+        const int mi_32_col_offset = ((idx_32 & 1) << 2);
+        const int mi_32_row_offset = ((idx_32 >> 1) << 2);
+        if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows)
+          continue;
+        switch (mip[0]->mbmi.sb_type) {
+          case BLOCK_32X32:
+            build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+            break;
+          case BLOCK_32X16: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+#if CONFIG_SUPERTX
+            if (supertx_enabled(&mip[0]->mbmi)) break;
+#endif
+            if (mi_32_row_offset + 2 >= max_rows) continue;
+            mip2 = mip + mode_info_stride * 2;
+            build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm);
+            break;
+          case BLOCK_16X32: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+#if CONFIG_SUPERTX
+            if (supertx_enabled(&mip[0]->mbmi)) break;
+#endif
+            if (mi_32_col_offset + 2 >= max_cols) continue;
+            mip2 = mip + 2;
+            build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm);
+            break;
+          default:
+#if CONFIG_SUPERTX
+            if (mip[0]->mbmi.tx_size == TX_32X32) {
+              build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+              break;
+            }
+#endif
+            for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) {
+              const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16];
+              const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16];
+              const int mi_16_col_offset =
+                  mi_32_col_offset + ((idx_16 & 1) << 1);
+              const int mi_16_row_offset =
+                  mi_32_row_offset + ((idx_16 >> 1) << 1);
+
+              if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows)
+                continue;
+
+              switch (mip[0]->mbmi.sb_type) {
+                case BLOCK_16X16:
+                  build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+                  break;
+                case BLOCK_16X8:
+#if CONFIG_SUPERTX
+                  if (supertx_enabled(&mip[0]->mbmi)) break;
+#endif
+                  build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+                  if (mi_16_row_offset + 1 >= max_rows) continue;
+                  mip2 = mip + mode_info_stride;
+                  build_y_mask(lfi_n, mip2[0], shift_y + 8,
+#if CONFIG_SUPERTX
+                               0,
+#endif
+                               lfm);
+                  break;
+                case BLOCK_8X16:
+#if CONFIG_SUPERTX
+                  if (supertx_enabled(&mip[0]->mbmi)) break;
+#endif
+                  build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+                  if (mi_16_col_offset + 1 >= max_cols) continue;
+                  mip2 = mip + 1;
+                  build_y_mask(lfi_n, mip2[0], shift_y + 1,
+#if CONFIG_SUPERTX
+                               0,
+#endif
+                               lfm);
+                  break;
+                default: {
+                  const int shift_y =
+                      shift_32_y[idx_32] + shift_16_y[idx_16] + shift_8_y[0];
+#if CONFIG_SUPERTX
+                  if (mip[0]->mbmi.tx_size == TX_16X16) {
+                    build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+                    break;
+                  }
+#endif
+                  build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+                  mip += offset[0];
+                  for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) {
+                    const int shift_y = shift_32_y[idx_32] +
+                                        shift_16_y[idx_16] + shift_8_y[idx_8];
+                    const int mi_8_col_offset =
+                        mi_16_col_offset + ((idx_8 & 1));
+                    const int mi_8_row_offset =
+                        mi_16_row_offset + ((idx_8 >> 1));
+
+                    if (mi_8_col_offset >= max_cols ||
+                        mi_8_row_offset >= max_rows)
+                      continue;
+                    build_y_mask(lfi_n, mip[0], shift_y,
+#if CONFIG_SUPERTX
+                                 supertx_enabled(&mip[0]->mbmi),
+#endif
+                                 lfm);
+                  }
+                  break;
+                }
+              }
+            }
+            break;
+        }
+      }
+      break;
+  }
+  // The largest loopfilter we have is 16x16 so we use the 16x16 mask
+  // for 32x32 transforms also.
+  lfm->left_y[TX_16X16] |= lfm->left_y[TX_32X32];
+  lfm->above_y[TX_16X16] |= lfm->above_y[TX_32X32];
+  lfm->left_uv[TX_16X16] |= lfm->left_uv[TX_32X32];
+  lfm->above_uv[TX_16X16] |= lfm->above_uv[TX_32X32];
+
+  // We do at least 8 tap filter on every 32x32 even if the transform size
+  // is 4x4. So if the 4x4 is set on a border pixel add it to the 8x8 and
+  // remove it from the 4x4.
+  lfm->left_y[TX_8X8] |= lfm->left_y[TX_4X4] & left_border;
+  lfm->left_y[TX_4X4] &= ~left_border;
+  lfm->above_y[TX_8X8] |= lfm->above_y[TX_4X4] & above_border;
+  lfm->above_y[TX_4X4] &= ~above_border;
+  lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_4X4] & left_border_uv;
+  lfm->left_uv[TX_4X4] &= ~left_border_uv;
+  lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_4X4] & above_border_uv;
+  lfm->above_uv[TX_4X4] &= ~above_border_uv;
+
+  // We do some special edge handling.
+  if (mi_row + MAX_MIB_SIZE > cm->mi_rows) {
+    const uint64_t rows = cm->mi_rows - mi_row;
+
+    // Each pixel inside the border gets a 1,
+    const uint64_t mask_y = (((uint64_t)1 << (rows << MAX_MIB_SIZE_LOG2)) - 1);
+    const uint16_t mask_uv =
+        (((uint16_t)1 << (((rows + 1) >> 1) << (MAX_MIB_SIZE_LOG2 - 1))) - 1);
+
+    // Remove values completely outside our border.
+    for (i = 0; i < TX_32X32; i++) {
+      lfm->left_y[i] &= mask_y;
+      lfm->above_y[i] &= mask_y;
+      lfm->left_uv[i] &= mask_uv;
+      lfm->above_uv[i] &= mask_uv;
+    }
+    lfm->int_4x4_y &= mask_y;
+    lfm->above_int_4x4_uv = lfm->left_int_4x4_uv & mask_uv;
+
+    // We don't apply a wide loop filter on the last uv block row. If set
+    // apply the shorter one instead.
+    if (rows == 1) {
+      lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16];
+      lfm->above_uv[TX_16X16] = 0;
+    }
+    if (rows == 5) {
+      lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16] & 0xff00;
+      lfm->above_uv[TX_16X16] &= ~(lfm->above_uv[TX_16X16] & 0xff00);
+    }
+  }
+
+  if (mi_col + MAX_MIB_SIZE > cm->mi_cols) {
+    const uint64_t columns = cm->mi_cols - mi_col;
+
+    // Each pixel inside the border gets a 1, the multiply copies the border
+    // to where we need it.
+    const uint64_t mask_y = (((1 << columns) - 1)) * 0x0101010101010101ULL;
+    const uint16_t mask_uv = ((1 << ((columns + 1) >> 1)) - 1) * 0x1111;
+
+    // Internal edges are not applied on the last column of the image so
+    // we mask 1 more for the internal edges
+    const uint16_t mask_uv_int = ((1 << (columns >> 1)) - 1) * 0x1111;
+
+    // Remove the bits outside the image edge.
+    for (i = 0; i < TX_32X32; i++) {
+      lfm->left_y[i] &= mask_y;
+      lfm->above_y[i] &= mask_y;
+      lfm->left_uv[i] &= mask_uv;
+      lfm->above_uv[i] &= mask_uv;
+    }
+    lfm->int_4x4_y &= mask_y;
+    lfm->left_int_4x4_uv &= mask_uv_int;
+
+    // We don't apply a wide loop filter on the last uv column. If set
+    // apply the shorter one instead.
+    if (columns == 1) {
+      lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_16X16];
+      lfm->left_uv[TX_16X16] = 0;
+    }
+    if (columns == 5) {
+      lfm->left_uv[TX_8X8] |= (lfm->left_uv[TX_16X16] & 0xcccc);
+      lfm->left_uv[TX_16X16] &= ~(lfm->left_uv[TX_16X16] & 0xcccc);
+    }
+  }
+  // We don't apply a loop filter on the first column in the image, mask that
+  // out.
+  if (mi_col == 0) {
+    for (i = 0; i < TX_32X32; i++) {
+      lfm->left_y[i] &= 0xfefefefefefefefeULL;
+      lfm->left_uv[i] &= 0xeeee;
+    }
+  }
+
+  // Assert if we try to apply 2 different loop filters at the same position.
+  assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8]));
+  assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4]));
+  assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4]));
+  assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16]));
+  assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_8X8]));
+  assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4]));
+  assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4]));
+  assert(!(lfm->left_int_4x4_uv & lfm->left_uv[TX_16X16]));
+  assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8]));
+  assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4]));
+  assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4]));
+  assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16]));
+  assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8]));
+  assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4]));
+  assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4]));
+  assert(!(lfm->above_int_4x4_uv & lfm->above_uv[TX_16X16]));
+}
+
+static void filter_selectively_vert(
+    uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
+    unsigned int mask_4x4, unsigned int mask_4x4_int,
+    const loop_filter_info_n *lfi_n, const uint8_t *lfl) {
+  unsigned int mask;
+
+  for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
+       mask >>= 1) {
+    const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
+
+    if (mask & 1) {
+      if (mask_16x16 & 1) {
+        vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
+      } else if (mask_8x8 & 1) {
+        vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
+      } else if (mask_4x4 & 1) {
+        vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
+      }
+    }
+    if (mask_4x4_int & 1)
+      vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
+    s += 8;
+    lfl += 1;
+    mask_16x16 >>= 1;
+    mask_8x8 >>= 1;
+    mask_4x4 >>= 1;
+    mask_4x4_int >>= 1;
+  }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_filter_selectively_vert(
+    uint16_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
+    unsigned int mask_4x4, unsigned int mask_4x4_int,
+    const loop_filter_info_n *lfi_n, const uint8_t *lfl, int bd) {
+  unsigned int mask;
+
+  for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
+       mask >>= 1) {
+    const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
+
+    if (mask & 1) {
+      if (mask_16x16 & 1) {
+        vpx_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
+                                   bd);
+      } else if (mask_8x8 & 1) {
+        vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
+                                  bd);
+      } else if (mask_4x4 & 1) {
+        vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
+                                  bd);
+      }
+    }
+    if (mask_4x4_int & 1)
+      vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,
+                                lfi->hev_thr, bd);
+    s += 8;
+    lfl += 1;
+    mask_16x16 >>= 1;
+    mask_8x8 >>= 1;
+    mask_4x4 >>= 1;
+    mask_4x4_int >>= 1;
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+void vp10_filter_block_plane_non420(VP10_COMMON *cm,
+                                    struct macroblockd_plane *plane,
+                                    MODE_INFO **mib, int mi_row, int mi_col) {
+  const int ss_x = plane->subsampling_x;
+  const int ss_y = plane->subsampling_y;
+  const int row_step = 1 << ss_y;
+  const int col_step = 1 << ss_x;
+  struct buf_2d *const dst = &plane->dst;
+  uint8_t *const dst0 = dst->buf;
+  unsigned int mask_16x16[MAX_MIB_SIZE] = { 0 };
+  unsigned int mask_8x8[MAX_MIB_SIZE] = { 0 };
+  unsigned int mask_4x4[MAX_MIB_SIZE] = { 0 };
+  unsigned int mask_4x4_int[MAX_MIB_SIZE] = { 0 };
+  uint8_t lfl[MAX_MIB_SIZE][MAX_MIB_SIZE];
+  int r, c;
+
+  for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += row_step) {
+    unsigned int mask_16x16_c = 0;
+    unsigned int mask_8x8_c = 0;
+    unsigned int mask_4x4_c = 0;
+    unsigned int border_mask;
+
+    // Determine the vertical edges that need filtering
+    for (c = 0; c < cm->mib_size && mi_col + c < cm->mi_cols; c += col_step) {
+      const MODE_INFO *mi = mib[c];
+      const MB_MODE_INFO *mbmi = &mi[0].mbmi;
+      const BLOCK_SIZE sb_type = mbmi->sb_type;
+      const int skip_this = mbmi->skip && is_inter_block(mbmi);
+      const int blk_row = r & (num_8x8_blocks_high_lookup[sb_type] - 1);
+      const int blk_col = c & (num_8x8_blocks_wide_lookup[sb_type] - 1);
+
+      // left edge of current unit is block/partition edge -> no skip
+      const int block_edge_left =
+          (num_4x4_blocks_wide_lookup[sb_type] > 1) ? !blk_col : 1;
+      const int skip_this_c = skip_this && !block_edge_left;
+      // top edge of current unit is block/partition edge -> no skip
+      const int block_edge_above =
+          (num_4x4_blocks_high_lookup[sb_type] > 1) ? !blk_row : 1;
+      const int skip_this_r = skip_this && !block_edge_above;
+
+#if CONFIG_VAR_TX
+      TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV)
+                            ? get_uv_tx_size(mbmi, plane)
+                            : mbmi->tx_size;
+#else
+      const TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV)
+                                  ? get_uv_tx_size(mbmi, plane)
+                                  : mbmi->tx_size;
+#endif
+
+      const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1;
+      const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
+
+      TX_SIZE tx_size_c = num_4x4_blocks_wide_txsize_log2_lookup[tx_size];
+      TX_SIZE tx_size_r = num_4x4_blocks_high_txsize_log2_lookup[tx_size];
+
+      int tx_size_mask = 0;
+      // Filter level can vary per MI
+      if (!(lfl[r][c >> ss_x] = get_filter_level(&cm->lf_info, mbmi))) continue;
+
+      if (tx_size == TX_32X32)
+        tx_size_mask = 3;
+      else if (tx_size == TX_16X16)
+        tx_size_mask = 1;
+      else
+        tx_size_mask = 0;
+
+#if CONFIG_VAR_TX
+      if (is_inter_block(mbmi) && !mbmi->skip)
+        tx_size =
+            (plane->plane_type == PLANE_TYPE_UV)
+                ? get_uv_tx_size_impl(mbmi->inter_tx_size[blk_row][blk_col],
+                                      sb_type, ss_x, ss_y)
+                : mbmi->inter_tx_size[blk_row][blk_col];
+
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+      tx_size_r =
+          VPXMIN(txsize_horz_map[tx_size], cm->above_txfm_context[mi_col + c]);
+      tx_size_c = VPXMIN(txsize_vert_map[tx_size],
+                         cm->left_txfm_context[(mi_row + r) & MAX_MIB_MASK]);
+
+      cm->above_txfm_context[mi_col + c] = txsize_horz_map[tx_size];
+      cm->left_txfm_context[(mi_row + r) & MAX_MIB_MASK] =
+          txsize_vert_map[tx_size];
+#else
+      tx_size_r = VPXMIN(tx_size, cm->above_txfm_context[mi_col + c]);
+      tx_size_c =
+          VPXMIN(tx_size, cm->left_txfm_context[(mi_row + r) & MAX_MIB_MASK]);
+
+      cm->above_txfm_context[mi_col + c] = tx_size;
+      cm->left_txfm_context[(mi_row + r) & MAX_MIB_MASK] = tx_size;
+#endif
+#endif
+
+      // Build masks based on the transform size of each block
+      // handle vertical mask
+      if (tx_size_c == TX_32X32) {
+        if (!skip_this_c && ((c >> ss_x) & tx_size_mask) == 0) {
+          if (!skip_border_4x4_c)
+            mask_16x16_c |= 1 << (c >> ss_x);
+          else
+            mask_8x8_c |= 1 << (c >> ss_x);
+        }
+      } else if (tx_size_c == TX_16X16) {
+        if (!skip_this_c && ((c >> ss_x) & tx_size_mask) == 0) {
+          if (!skip_border_4x4_c)
+            mask_16x16_c |= 1 << (c >> ss_x);
+          else
+            mask_8x8_c |= 1 << (c >> ss_x);
+        }
+      } else {
+        // force 8x8 filtering on 32x32 boundaries
+        if (!skip_this_c && ((c >> ss_x) & tx_size_mask) == 0) {
+          if (tx_size_c == TX_8X8 || ((c >> ss_x) & 3) == 0)
+            mask_8x8_c |= 1 << (c >> ss_x);
+          else
+            mask_4x4_c |= 1 << (c >> ss_x);
+        }
+
+        if (!skip_this && tx_size_c < TX_8X8 && !skip_border_4x4_c &&
+            ((c >> ss_x) & tx_size_mask) == 0)
+          mask_4x4_int[r] |= 1 << (c >> ss_x);
+      }
+
+      // set horizontal mask
+      if (tx_size_r == TX_32X32) {
+        if (!skip_this_r && ((r >> ss_y) & tx_size_mask) == 0) {
+          if (!skip_border_4x4_r)
+            mask_16x16[r] |= 1 << (c >> ss_x);
+          else
+            mask_8x8[r] |= 1 << (c >> ss_x);
+        }
+      } else if (tx_size_r == TX_16X16) {
+        if (!skip_this_r && ((r >> ss_y) & tx_size_mask) == 0) {
+          if (!skip_border_4x4_r)
+            mask_16x16[r] |= 1 << (c >> ss_x);
+          else
+            mask_8x8[r] |= 1 << (c >> ss_x);
+        }
+      } else {
+        // force 8x8 filtering on 32x32 boundaries
+        if (!skip_this_r && ((r >> ss_y) & tx_size_mask) == 0) {
+          if (tx_size_r == TX_8X8 || ((r >> ss_y) & 3) == 0)
+            mask_8x8[r] |= 1 << (c >> ss_x);
+          else
+            mask_4x4[r] |= 1 << (c >> ss_x);
+        }
+
+        if (!skip_this && tx_size_r < TX_8X8 && !skip_border_4x4_c &&
+            ((r >> ss_y) & tx_size_mask) == 0)
+          mask_4x4_int[r] |= 1 << (c >> ss_x);
+      }
+    }
+
+    // Disable filtering on the leftmost column
+    border_mask = ~(mi_col == 0);
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (cm->use_highbitdepth) {
+      highbd_filter_selectively_vert(
+          CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
+          mask_16x16_c & border_mask, mask_8x8_c & border_mask,
+          mask_4x4_c & border_mask, mask_4x4_int[r], &cm->lf_info, &lfl[r][0],
+          (int)cm->bit_depth);
+    } else {
+      filter_selectively_vert(dst->buf, dst->stride, mask_16x16_c & border_mask,
+                              mask_8x8_c & border_mask,
+                              mask_4x4_c & border_mask, mask_4x4_int[r],
+                              &cm->lf_info, &lfl[r][0]);
+    }
+#else
+    filter_selectively_vert(dst->buf, dst->stride, mask_16x16_c & border_mask,
+                            mask_8x8_c & border_mask, mask_4x4_c & border_mask,
+                            mask_4x4_int[r], &cm->lf_info, &lfl[r][0]);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+    dst->buf += MI_SIZE * dst->stride;
+    mib += row_step * cm->mi_stride;
+  }
+
+  // Now do horizontal pass
+  dst->buf = dst0;
+  for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += row_step) {
+    const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
+    const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r];
+
+    unsigned int mask_16x16_r;
+    unsigned int mask_8x8_r;
+    unsigned int mask_4x4_r;
+
+    if (mi_row + r == 0) {
+      mask_16x16_r = 0;
+      mask_8x8_r = 0;
+      mask_4x4_r = 0;
+    } else {
+      mask_16x16_r = mask_16x16[r];
+      mask_8x8_r = mask_8x8[r];
+      mask_4x4_r = mask_4x4[r];
+    }
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (cm->use_highbitdepth) {
+      highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
+                                      dst->stride, mask_16x16_r, mask_8x8_r,
+                                      mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
+                                      &lfl[r][0], (int)cm->bit_depth);
+    } else {
+      filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
+                               mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
+                               &lfl[r][0]);
+    }
+#else
+    filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
+                             mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
+                             &lfl[r][0]);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+    dst->buf += MI_SIZE * dst->stride;
+  }
+}
+
+void vp10_filter_block_plane_ss00(VP10_COMMON *const cm,
+                                  struct macroblockd_plane *const plane,
+                                  int mi_row, LOOP_FILTER_MASK *lfm) {
+  struct buf_2d *const dst = &plane->dst;
+  uint8_t *const dst0 = dst->buf;
+  int r;
+  uint64_t mask_16x16 = lfm->left_y[TX_16X16];
+  uint64_t mask_8x8 = lfm->left_y[TX_8X8];
+  uint64_t mask_4x4 = lfm->left_y[TX_4X4];
+  uint64_t mask_4x4_int = lfm->int_4x4_y;
+
+  assert(plane->subsampling_x == 0 && plane->subsampling_y == 0);
+
+  // Vertical pass: do 2 rows at one time
+  for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 2) {
+    unsigned int mask_16x16_l = mask_16x16 & 0xffff;
+    unsigned int mask_8x8_l = mask_8x8 & 0xffff;
+    unsigned int mask_4x4_l = mask_4x4 & 0xffff;
+    unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff;
+
+// Disable filtering on the leftmost column.
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (cm->use_highbitdepth) {
+      highbd_filter_selectively_vert_row2(
+          plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
+          mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
+          &lfm->lfl_y[r][0], (int)cm->bit_depth);
+    } else {
+      filter_selectively_vert_row2(
+          plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
+          mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r][0]);
+    }
+#else
+    filter_selectively_vert_row2(
+        plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
+        mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r][0]);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+    dst->buf += 2 * MI_SIZE * dst->stride;
+    mask_16x16 >>= 2 * MI_SIZE;
+    mask_8x8 >>= 2 * MI_SIZE;
+    mask_4x4 >>= 2 * MI_SIZE;
+    mask_4x4_int >>= 2 * MI_SIZE;
+  }
+
+  // Horizontal pass
+  dst->buf = dst0;
+  mask_16x16 = lfm->above_y[TX_16X16];
+  mask_8x8 = lfm->above_y[TX_8X8];
+  mask_4x4 = lfm->above_y[TX_4X4];
+  mask_4x4_int = lfm->int_4x4_y;
+
+  for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r++) {
+    unsigned int mask_16x16_r;
+    unsigned int mask_8x8_r;
+    unsigned int mask_4x4_r;
+
+    if (mi_row + r == 0) {
+      mask_16x16_r = 0;
+      mask_8x8_r = 0;
+      mask_4x4_r = 0;
+    } else {
+      mask_16x16_r = mask_16x16 & 0xff;
+      mask_8x8_r = mask_8x8 & 0xff;
+      mask_4x4_r = mask_4x4 & 0xff;
+    }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (cm->use_highbitdepth) {
+      highbd_filter_selectively_horiz(
+          CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
+          mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r][0],
+          (int)cm->bit_depth);
+    } else {
+      filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
+                               mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
+                               &lfm->lfl_y[r][0]);
+    }
+#else
+    filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
+                             mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
+                             &lfm->lfl_y[r][0]);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+    dst->buf += MI_SIZE * dst->stride;
+    mask_16x16 >>= MI_SIZE;
+    mask_8x8 >>= MI_SIZE;
+    mask_4x4 >>= MI_SIZE;
+    mask_4x4_int >>= MI_SIZE;
+  }
+}
+
+void vp10_filter_block_plane_ss11(VP10_COMMON *const cm,
+                                  struct macroblockd_plane *const plane,
+                                  int mi_row, LOOP_FILTER_MASK *lfm) {
+  struct buf_2d *const dst = &plane->dst;
+  uint8_t *const dst0 = dst->buf;
+  int r, c;
+
+  uint16_t mask_16x16 = lfm->left_uv[TX_16X16];
+  uint16_t mask_8x8 = lfm->left_uv[TX_8X8];
+  uint16_t mask_4x4 = lfm->left_uv[TX_4X4];
+  uint16_t mask_4x4_int = lfm->left_int_4x4_uv;
+
+  assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
+  assert(plane->plane_type == PLANE_TYPE_UV);
+
+  // Vertical pass: do 2 rows at one time
+  for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 4) {
+    for (c = 0; c < (cm->mib_size >> 1); c++) {
+      lfm->lfl_uv[r >> 1][c] = lfm->lfl_y[r][c << 1];
+      lfm->lfl_uv[(r + 2) >> 1][c] = lfm->lfl_y[r + 2][c << 1];
+    }
+
+    {
+      unsigned int mask_16x16_l = mask_16x16 & 0xff;
+      unsigned int mask_8x8_l = mask_8x8 & 0xff;
+      unsigned int mask_4x4_l = mask_4x4 & 0xff;
+      unsigned int mask_4x4_int_l = mask_4x4_int & 0xff;
+
+// Disable filtering on the leftmost column.
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (cm->use_highbitdepth) {
+        highbd_filter_selectively_vert_row2(
+            plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
+            mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
+            &lfm->lfl_uv[r >> 1][0], (int)cm->bit_depth);
+      } else {
+        filter_selectively_vert_row2(plane->subsampling_x, dst->buf,
+                                     dst->stride, mask_16x16_l, mask_8x8_l,
+                                     mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
+                                     &lfm->lfl_uv[r >> 1][0]);
+      }
+#else
+      filter_selectively_vert_row2(
+          plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
+          mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_uv[r >> 1][0]);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+      dst->buf += 2 * MI_SIZE * dst->stride;
+      mask_16x16 >>= MI_SIZE;
+      mask_8x8 >>= MI_SIZE;
+      mask_4x4 >>= MI_SIZE;
+      mask_4x4_int >>= MI_SIZE;
+    }
+  }
+
+  // Horizontal pass
+  dst->buf = dst0;
+  mask_16x16 = lfm->above_uv[TX_16X16];
+  mask_8x8 = lfm->above_uv[TX_8X8];
+  mask_4x4 = lfm->above_uv[TX_4X4];
+  mask_4x4_int = lfm->above_int_4x4_uv;
+
+  for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 2) {
+    const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1;
+    const unsigned int mask_4x4_int_r =
+        skip_border_4x4_r ? 0 : (mask_4x4_int & 0xf);
+    unsigned int mask_16x16_r;
+    unsigned int mask_8x8_r;
+    unsigned int mask_4x4_r;
+
+    if (mi_row + r == 0) {
+      mask_16x16_r = 0;
+      mask_8x8_r = 0;
+      mask_4x4_r = 0;
+    } else {
+      mask_16x16_r = mask_16x16 & 0xf;
+      mask_8x8_r = mask_8x8 & 0xf;
+      mask_4x4_r = mask_4x4 & 0xf;
+    }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (cm->use_highbitdepth) {
+      highbd_filter_selectively_horiz(
+          CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
+          mask_4x4_r, mask_4x4_int_r, &cm->lf_info, &lfm->lfl_uv[r >> 1][0],
+          (int)cm->bit_depth);
+    } else {
+      filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
+                               mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
+                               &lfm->lfl_uv[r >> 1][0]);
+    }
+#else
+    filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
+                             mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
+                             &lfm->lfl_uv[r >> 1][0]);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+    dst->buf += MI_SIZE * dst->stride;
+    mask_16x16 >>= MI_SIZE / 2;
+    mask_8x8 >>= MI_SIZE / 2;
+    mask_4x4 >>= MI_SIZE / 2;
+    mask_4x4_int >>= MI_SIZE / 2;
+  }
+}
+
+void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, VP10_COMMON *cm,
+                           struct macroblockd_plane planes[MAX_MB_PLANE],
+                           int start, int stop, int y_only) {
+#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
+  const int num_planes = y_only ? 1 : MAX_MB_PLANE;
+  int mi_row, mi_col;
+
+#if CONFIG_VAR_TX
+  memset(cm->above_txfm_context, TX_SIZES, cm->mi_cols);
+#endif  // CONFIG_VAR_TX
+  for (mi_row = start; mi_row < stop; mi_row += cm->mib_size) {
+    MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
+#if CONFIG_VAR_TX
+    memset(cm->left_txfm_context, TX_SIZES, MAX_MIB_SIZE);
+#endif  // CONFIG_VAR_TX
+    for (mi_col = 0; mi_col < cm->mi_cols; mi_col += cm->mib_size) {
+      int plane;
+
+      vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
+
+      for (plane = 0; plane < num_planes; ++plane)
+        vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, mi_row,
+                                       mi_col);
+    }
+  }
+#else
+  const int num_planes = y_only ? 1 : MAX_MB_PLANE;
+  int mi_row, mi_col;
+  enum lf_path path;
+  LOOP_FILTER_MASK lfm;
+
+  if (y_only)
+    path = LF_PATH_444;
+  else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
+    path = LF_PATH_420;
+  else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0)
+    path = LF_PATH_444;
+  else
+    path = LF_PATH_SLOW;
+
+  for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
+    MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
+    for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
+      int plane;
+
+      vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
+
+      // TODO(JBB): Make setup_mask work for non 420.
+      vp10_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
+
+      vp10_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm);
+      for (plane = 1; plane < num_planes; ++plane) {
+        switch (path) {
+          case LF_PATH_420:
+            vp10_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm);
+            break;
+          case LF_PATH_444:
+            vp10_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm);
+            break;
+          case LF_PATH_SLOW:
+            vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
+                                           mi_row, mi_col);
+            break;
+        }
+      }
+    }
+  }
+#endif  // CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
+}
+
+void vp10_loop_filter_frame(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm,
+                            MACROBLOCKD *xd, int frame_filter_level, int y_only,
+                            int partial_frame) {
+  int start_mi_row, end_mi_row, mi_rows_to_filter;
+  if (!frame_filter_level) return;
+  start_mi_row = 0;
+  mi_rows_to_filter = cm->mi_rows;
+  if (partial_frame && cm->mi_rows > 8) {
+    start_mi_row = cm->mi_rows >> 1;
+    start_mi_row &= 0xfffffff8;
+    mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8);
+  }
+  end_mi_row = start_mi_row + mi_rows_to_filter;
+  vp10_loop_filter_frame_init(cm, frame_filter_level);
+  vp10_loop_filter_rows(frame, cm, xd->plane, start_mi_row, end_mi_row, y_only);
+}
+
+void vp10_loop_filter_data_reset(
+    LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer,
+    struct VP10Common *cm,
+    const struct macroblockd_plane planes[MAX_MB_PLANE]) {
+  lf_data->frame_buffer = frame_buffer;
+  lf_data->cm = cm;
+  lf_data->start = 0;
+  lf_data->stop = 0;
+  lf_data->y_only = 0;
+  memcpy(lf_data->planes, planes, sizeof(lf_data->planes));
+}
+
+int vp10_loop_filter_worker(LFWorkerData *const lf_data, void *unused) {
+  (void)unused;
+  vp10_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
+                        lf_data->start, lf_data->stop, lf_data->y_only);
+  return 1;
+}
diff --git a/av1/common/loopfilter.h b/av1/common/loopfilter.h
new file mode 100644
index 0000000..b85ed04
--- /dev/null
+++ b/av1/common/loopfilter.h
@@ -0,0 +1,152 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_LOOPFILTER_H_
+#define VP10_COMMON_LOOPFILTER_H_
+
+#include "aom_ports/mem.h"
+#include "./vpx_config.h"
+
+#include "av1/common/blockd.h"
+#include "av1/common/restoration.h"
+#include "av1/common/seg_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_LOOP_FILTER 63
+#define MAX_SHARPNESS 7
+
+#define SIMD_WIDTH 16
+
+#define MAX_MODE_LF_DELTAS 2
+
+enum lf_path {
+  LF_PATH_420,
+  LF_PATH_444,
+  LF_PATH_SLOW,
+};
+
+struct loopfilter {
+  int filter_level;
+
+  int sharpness_level;
+  int last_sharpness_level;
+
+  uint8_t mode_ref_delta_enabled;
+  uint8_t mode_ref_delta_update;
+
+  // 0 = Intra, Last, Last2+Last3(CONFIG_EXT_REFS),
+  // GF, BRF(CONFIG_EXT_REFS), ARF
+  signed char ref_deltas[TOTAL_REFS_PER_FRAME];
+  signed char last_ref_deltas[TOTAL_REFS_PER_FRAME];
+
+  // 0 = ZERO_MV, MV
+  signed char mode_deltas[MAX_MODE_LF_DELTAS];
+  signed char last_mode_deltas[MAX_MODE_LF_DELTAS];
+};
+
+// Need to align this structure so when it is declared and
+// passed it can be loaded into vector registers.
+typedef struct {
+  DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, mblim[SIMD_WIDTH]);
+  DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, lim[SIMD_WIDTH]);
+  DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, hev_thr[SIMD_WIDTH]);
+} loop_filter_thresh;
+
+typedef struct {
+  loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1];
+  uint8_t lvl[MAX_SEGMENTS][TOTAL_REFS_PER_FRAME][MAX_MODE_LF_DELTAS];
+} loop_filter_info_n;
+
+// This structure holds bit masks for all 8x8 blocks in a 64x64 region.
+// Each 1 bit represents a position in which we want to apply the loop filter.
+// Left_ entries refer to whether we apply a filter on the border to the
+// left of the block.   Above_ entries refer to whether or not to apply a
+// filter on the above border.   Int_ entries refer to whether or not to
+// apply borders on the 4x4 edges within the 8x8 block that each bit
+// represents.
+// Since each transform is accompanied by a potentially different type of
+// loop filter there is a different entry in the array for each transform size.
+typedef struct {
+  uint64_t left_y[TX_SIZES];
+  uint64_t above_y[TX_SIZES];
+  uint64_t int_4x4_y;
+  uint16_t left_uv[TX_SIZES];
+  uint16_t above_uv[TX_SIZES];
+  uint16_t left_int_4x4_uv;
+  uint16_t above_int_4x4_uv;
+  uint8_t lfl_y[MAX_MIB_SIZE][MAX_MIB_SIZE];
+  uint8_t lfl_uv[MAX_MIB_SIZE / 2][MAX_MIB_SIZE / 2];
+} LOOP_FILTER_MASK;
+
+/* assorted loopfilter functions which get used elsewhere */
+struct VP10Common;
+struct macroblockd;
+struct VP10LfSyncData;
+
+// This function sets up the bit masks for the entire 64x64 region represented
+// by mi_row, mi_col.
+void vp10_setup_mask(struct VP10Common *const cm, const int mi_row,
+                     const int mi_col, MODE_INFO **mi_8x8,
+                     const int mode_info_stride, LOOP_FILTER_MASK *lfm);
+
+void vp10_filter_block_plane_ss00(struct VP10Common *const cm,
+                                  struct macroblockd_plane *const plane,
+                                  int mi_row, LOOP_FILTER_MASK *lfm);
+
+void vp10_filter_block_plane_ss11(struct VP10Common *const cm,
+                                  struct macroblockd_plane *const plane,
+                                  int mi_row, LOOP_FILTER_MASK *lfm);
+
+void vp10_filter_block_plane_non420(struct VP10Common *cm,
+                                    struct macroblockd_plane *plane,
+                                    MODE_INFO **mi_8x8, int mi_row, int mi_col);
+
+void vp10_loop_filter_init(struct VP10Common *cm);
+
+// Update the loop filter for the current frame.
+// This should be called before vp10_loop_filter_rows(),
+// vp10_loop_filter_frame()
+// calls this function directly.
+void vp10_loop_filter_frame_init(struct VP10Common *cm, int default_filt_lvl);
+
+void vp10_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct VP10Common *cm,
+                            struct macroblockd *mbd, int filter_level,
+                            int y_only, int partial_frame);
+
+// Apply the loop filter to [start, stop) macro block rows in frame_buffer.
+void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
+                           struct VP10Common *cm,
+                           struct macroblockd_plane planes[MAX_MB_PLANE],
+                           int start, int stop, int y_only);
+
+typedef struct LoopFilterWorkerData {
+  YV12_BUFFER_CONFIG *frame_buffer;
+  struct VP10Common *cm;
+  struct macroblockd_plane planes[MAX_MB_PLANE];
+
+  int start;
+  int stop;
+  int y_only;
+} LFWorkerData;
+
+void vp10_loop_filter_data_reset(
+    LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer,
+    struct VP10Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]);
+
+// Operates on the rows described by 'lf_data'.
+int vp10_loop_filter_worker(LFWorkerData *const lf_data, void *unused);
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_LOOPFILTER_H_
diff --git a/av1/common/mips/dspr2/itrans16_dspr2.c b/av1/common/mips/dspr2/itrans16_dspr2.c
new file mode 100644
index 0000000..c0b9b2a
--- /dev/null
+++ b/av1/common/mips/dspr2/itrans16_dspr2.c
@@ -0,0 +1,96 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+
+#include "./vpx_config.h"
+#include "./vp10_rtcd.h"
+#include "av1/common/common.h"
+#include "av1/common/blockd.h"
+#include "av1/common/idct.h"
+#include "aom_dsp/mips/inv_txfm_dspr2.h"
+#include "aom_dsp/txfm_common.h"
+#include "aom_ports/mem.h"
+
+#if HAVE_DSPR2
+void vp10_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest, int pitch,
+                                 int tx_type) {
+  int i, j;
+  DECLARE_ALIGNED(32, int16_t, out[16 * 16]);
+  int16_t *outptr = out;
+  int16_t temp_out[16];
+  uint32_t pos = 45;
+
+  /* bit positon for extract from acc */
+  __asm__ __volatile__("wrdsp    %[pos],    1    \n\t" : : [pos] "r"(pos));
+
+  switch (tx_type) {
+    case DCT_DCT:  // DCT in both horizontal and vertical
+      idct16_rows_dspr2(input, outptr, 16);
+      idct16_cols_add_blk_dspr2(out, dest, pitch);
+      break;
+    case ADST_DCT:  // ADST in vertical, DCT in horizontal
+      idct16_rows_dspr2(input, outptr, 16);
+
+      outptr = out;
+
+      for (i = 0; i < 16; ++i) {
+        iadst16_dspr2(outptr, temp_out);
+
+        for (j = 0; j < 16; ++j)
+          dest[j * pitch + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) +
+                                           dest[j * pitch + i]);
+        outptr += 16;
+      }
+      break;
+    case DCT_ADST:  // DCT in vertical, ADST in horizontal
+    {
+      int16_t temp_in[16 * 16];
+
+      for (i = 0; i < 16; ++i) {
+        /* prefetch row */
+        prefetch_load((const uint8_t *)(input + 16));
+
+        iadst16_dspr2(input, outptr);
+        input += 16;
+        outptr += 16;
+      }
+
+      for (i = 0; i < 16; ++i)
+        for (j = 0; j < 16; ++j) temp_in[j * 16 + i] = out[i * 16 + j];
+
+      idct16_cols_add_blk_dspr2(temp_in, dest, pitch);
+    } break;
+    case ADST_ADST:  // ADST in both directions
+    {
+      int16_t temp_in[16];
+
+      for (i = 0; i < 16; ++i) {
+        /* prefetch row */
+        prefetch_load((const uint8_t *)(input + 16));
+
+        iadst16_dspr2(input, outptr);
+        input += 16;
+        outptr += 16;
+      }
+
+      for (i = 0; i < 16; ++i) {
+        for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
+        iadst16_dspr2(temp_in, temp_out);
+        for (j = 0; j < 16; ++j)
+          dest[j * pitch + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) +
+                                           dest[j * pitch + i]);
+      }
+    } break;
+    default: printf("vp10_short_iht16x16_add_dspr2 : Invalid tx_type\n"); break;
+  }
+}
+#endif  // #if HAVE_DSPR2
diff --git a/av1/common/mips/dspr2/itrans4_dspr2.c b/av1/common/mips/dspr2/itrans4_dspr2.c
new file mode 100644
index 0000000..dcb28c9
--- /dev/null
+++ b/av1/common/mips/dspr2/itrans4_dspr2.c
@@ -0,0 +1,90 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+
+#include "./vpx_config.h"
+#include "./vp10_rtcd.h"
+#include "av1/common/common.h"
+#include "av1/common/blockd.h"
+#include "av1/common/idct.h"
+#include "aom_dsp/mips/inv_txfm_dspr2.h"
+#include "aom_dsp/txfm_common.h"
+#include "aom_ports/mem.h"
+
+#if HAVE_DSPR2
+void vp10_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest,
+                              int dest_stride, int tx_type) {
+  int i, j;
+  DECLARE_ALIGNED(32, int16_t, out[4 * 4]);
+  int16_t *outptr = out;
+  int16_t temp_in[4 * 4], temp_out[4];
+  uint32_t pos = 45;
+
+  /* bit positon for extract from acc */
+  __asm__ __volatile__("wrdsp      %[pos],     1           \n\t"
+                       :
+                       : [pos] "r"(pos));
+
+  switch (tx_type) {
+    case DCT_DCT:  // DCT in both horizontal and vertical
+      vpx_idct4_rows_dspr2(input, outptr);
+      vpx_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride);
+      break;
+    case ADST_DCT:  // ADST in vertical, DCT in horizontal
+      vpx_idct4_rows_dspr2(input, outptr);
+
+      outptr = out;
+
+      for (i = 0; i < 4; ++i) {
+        iadst4_dspr2(outptr, temp_out);
+
+        for (j = 0; j < 4; ++j)
+          dest[j * dest_stride + i] = clip_pixel(
+              ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i]);
+
+        outptr += 4;
+      }
+      break;
+    case DCT_ADST:  // DCT in vertical, ADST in horizontal
+      for (i = 0; i < 4; ++i) {
+        iadst4_dspr2(input, outptr);
+        input += 4;
+        outptr += 4;
+      }
+
+      for (i = 0; i < 4; ++i) {
+        for (j = 0; j < 4; ++j) {
+          temp_in[i * 4 + j] = out[j * 4 + i];
+        }
+      }
+      vpx_idct4_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);
+      break;
+    case ADST_ADST:  // ADST in both directions
+      for (i = 0; i < 4; ++i) {
+        iadst4_dspr2(input, outptr);
+        input += 4;
+        outptr += 4;
+      }
+
+      for (i = 0; i < 4; ++i) {
+        for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
+        iadst4_dspr2(temp_in, temp_out);
+
+        for (j = 0; j < 4; ++j)
+          dest[j * dest_stride + i] = clip_pixel(
+              ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i]);
+      }
+      break;
+    default: printf("vp10_short_iht4x4_add_dspr2 : Invalid tx_type\n"); break;
+  }
+}
+#endif  // #if HAVE_DSPR2
diff --git a/av1/common/mips/dspr2/itrans8_dspr2.c b/av1/common/mips/dspr2/itrans8_dspr2.c
new file mode 100644
index 0000000..761d6f0
--- /dev/null
+++ b/av1/common/mips/dspr2/itrans8_dspr2.c
@@ -0,0 +1,84 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+
+#include "./vpx_config.h"
+#include "./vp10_rtcd.h"
+#include "av1/common/common.h"
+#include "av1/common/blockd.h"
+#include "aom_dsp/mips/inv_txfm_dspr2.h"
+#include "aom_dsp/txfm_common.h"
+#include "aom_ports/mem.h"
+
+#if HAVE_DSPR2
+void vp10_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest,
+                              int dest_stride, int tx_type) {
+  int i, j;
+  DECLARE_ALIGNED(32, int16_t, out[8 * 8]);
+  int16_t *outptr = out;
+  int16_t temp_in[8 * 8], temp_out[8];
+  uint32_t pos = 45;
+
+  /* bit positon for extract from acc */
+  __asm__ __volatile__("wrdsp    %[pos],    1    \n\t" : : [pos] "r"(pos));
+
+  switch (tx_type) {
+    case DCT_DCT:  // DCT in both horizontal and vertical
+      idct8_rows_dspr2(input, outptr, 8);
+      idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);
+      break;
+    case ADST_DCT:  // ADST in vertical, DCT in horizontal
+      idct8_rows_dspr2(input, outptr, 8);
+
+      for (i = 0; i < 8; ++i) {
+        iadst8_dspr2(&out[i * 8], temp_out);
+
+        for (j = 0; j < 8; ++j)
+          dest[j * dest_stride + i] = clip_pixel(
+              ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i]);
+      }
+      break;
+    case DCT_ADST:  // DCT in vertical, ADST in horizontal
+      for (i = 0; i < 8; ++i) {
+        iadst8_dspr2(input, outptr);
+        input += 8;
+        outptr += 8;
+      }
+
+      for (i = 0; i < 8; ++i) {
+        for (j = 0; j < 8; ++j) {
+          temp_in[i * 8 + j] = out[j * 8 + i];
+        }
+      }
+      idct8_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);
+      break;
+    case ADST_ADST:  // ADST in both directions
+      for (i = 0; i < 8; ++i) {
+        iadst8_dspr2(input, outptr);
+        input += 8;
+        outptr += 8;
+      }
+
+      for (i = 0; i < 8; ++i) {
+        for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
+
+        iadst8_dspr2(temp_in, temp_out);
+
+        for (j = 0; j < 8; ++j)
+          dest[j * dest_stride + i] = clip_pixel(
+              ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i]);
+      }
+      break;
+    default: printf("vp10_short_iht8x8_add_dspr2 : Invalid tx_type\n"); break;
+  }
+}
+#endif  // #if HAVE_DSPR2
diff --git a/av1/common/mips/msa/idct16x16_msa.c b/av1/common/mips/msa/idct16x16_msa.c
new file mode 100644
index 0000000..baa3a97
--- /dev/null
+++ b/av1/common/mips/msa/idct16x16_msa.c
@@ -0,0 +1,79 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "av1/common/enums.h"
+#include "aom_dsp/mips/inv_txfm_msa.h"
+
+void vp10_iht16x16_256_add_msa(const int16_t *input, uint8_t *dst,
+                               int32_t dst_stride, int32_t tx_type) {
+  int32_t i;
+  DECLARE_ALIGNED(32, int16_t, out[16 * 16]);
+  int16_t *out_ptr = &out[0];
+
+  switch (tx_type) {
+    case DCT_DCT:
+      /* transform rows */
+      for (i = 0; i < 2; ++i) {
+        /* process 16 * 8 block */
+        vpx_idct16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));
+      }
+
+      /* transform columns */
+      for (i = 0; i < 2; ++i) {
+        /* process 8 * 16 block */
+        vpx_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),
+                                         dst_stride);
+      }
+      break;
+    case ADST_DCT:
+      /* transform rows */
+      for (i = 0; i < 2; ++i) {
+        /* process 16 * 8 block */
+        vpx_idct16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));
+      }
+
+      /* transform columns */
+      for (i = 0; i < 2; ++i) {
+        vpx_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)),
+                                          (dst + (i << 3)), dst_stride);
+      }
+      break;
+    case DCT_ADST:
+      /* transform rows */
+      for (i = 0; i < 2; ++i) {
+        /* process 16 * 8 block */
+        vpx_iadst16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));
+      }
+
+      /* transform columns */
+      for (i = 0; i < 2; ++i) {
+        /* process 8 * 16 block */
+        vpx_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),
+                                         dst_stride);
+      }
+      break;
+    case ADST_ADST:
+      /* transform rows */
+      for (i = 0; i < 2; ++i) {
+        /* process 16 * 8 block */
+        vpx_iadst16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));
+      }
+
+      /* transform columns */
+      for (i = 0; i < 2; ++i) {
+        vpx_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)),
+                                          (dst + (i << 3)), dst_stride);
+      }
+      break;
+    default: assert(0); break;
+  }
+}
diff --git a/av1/common/mips/msa/idct4x4_msa.c b/av1/common/mips/msa/idct4x4_msa.c
new file mode 100644
index 0000000..0620df7
--- /dev/null
+++ b/av1/common/mips/msa/idct4x4_msa.c
@@ -0,0 +1,60 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "av1/common/enums.h"
+#include "aom_dsp/mips/inv_txfm_msa.h"
+
+void vp10_iht4x4_16_add_msa(const int16_t *input, uint8_t *dst,
+                            int32_t dst_stride, int32_t tx_type) {
+  v8i16 in0, in1, in2, in3;
+
+  /* load vector elements of 4x4 block */
+  LD4x4_SH(input, in0, in1, in2, in3);
+  TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+
+  switch (tx_type) {
+    case DCT_DCT:
+      /* DCT in horizontal */
+      VPX_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+      /* DCT in vertical */
+      TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+      VPX_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+      break;
+    case ADST_DCT:
+      /* DCT in horizontal */
+      VPX_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+      /* ADST in vertical */
+      TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+      VPX_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+      break;
+    case DCT_ADST:
+      /* ADST in horizontal */
+      VPX_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+      /* DCT in vertical */
+      TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+      VPX_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+      break;
+    case ADST_ADST:
+      /* ADST in horizontal */
+      VPX_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+      /* ADST in vertical */
+      TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+      VPX_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+      break;
+    default: assert(0); break;
+  }
+
+  /* final rounding (add 2^3, divide by 2^4) and shift */
+  SRARI_H4_SH(in0, in1, in2, in3, 4);
+  /* add block and store 4x4 */
+  ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride);
+}
diff --git a/av1/common/mips/msa/idct8x8_msa.c b/av1/common/mips/msa/idct8x8_msa.c
new file mode 100644
index 0000000..5c62c4a
--- /dev/null
+++ b/av1/common/mips/msa/idct8x8_msa.c
@@ -0,0 +1,78 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "av1/common/enums.h"
+#include "aom_dsp/mips/inv_txfm_msa.h"
+
+void vp10_iht8x8_64_add_msa(const int16_t *input, uint8_t *dst,
+                            int32_t dst_stride, int32_t tx_type) {
+  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+
+  /* load vector elements of 8x8 block */
+  LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+
+  TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+                     in4, in5, in6, in7);
+
+  switch (tx_type) {
+    case DCT_DCT:
+      /* DCT in horizontal */
+      VPX_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+                     in4, in5, in6, in7);
+      /* DCT in vertical */
+      TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
+                         in3, in4, in5, in6, in7);
+      VPX_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+                     in4, in5, in6, in7);
+      break;
+    case ADST_DCT:
+      /* DCT in horizontal */
+      VPX_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+                     in4, in5, in6, in7);
+      /* ADST in vertical */
+      TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
+                         in3, in4, in5, in6, in7);
+      VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+                in5, in6, in7);
+      break;
+    case DCT_ADST:
+      /* ADST in horizontal */
+      VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+                in5, in6, in7);
+      /* DCT in vertical */
+      TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
+                         in3, in4, in5, in6, in7);
+      VPX_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+                     in4, in5, in6, in7);
+      break;
+    case ADST_ADST:
+      /* ADST in horizontal */
+      VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+                in5, in6, in7);
+      /* ADST in vertical */
+      TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
+                         in3, in4, in5, in6, in7);
+      VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+                in5, in6, in7);
+      break;
+    default: assert(0); break;
+  }
+
+  /* final rounding (add 2^4, divide by 2^5) and shift */
+  SRARI_H4_SH(in0, in1, in2, in3, 5);
+  SRARI_H4_SH(in4, in5, in6, in7, 5);
+
+  /* add block and store 8x8 */
+  VPX_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
+  dst += (4 * dst_stride);
+  VPX_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
+}
diff --git a/av1/common/mv.h b/av1/common/mv.h
new file mode 100644
index 0000000..dba3336
--- /dev/null
+++ b/av1/common/mv.h
@@ -0,0 +1,149 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_MV_H_
+#define VP10_COMMON_MV_H_
+
+#include "av1/common/common.h"
+#include "aom_dsp/vpx_filter.h"
+#if CONFIG_GLOBAL_MOTION
+#include "av1/common/warped_motion.h"
+#endif  // CONFIG_GLOBAL_MOTION
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct mv {
+  int16_t row;
+  int16_t col;
+} MV;
+
+typedef union int_mv {
+  uint32_t as_int;
+  MV as_mv;
+} int_mv; /* facilitates faster equality tests and copies */
+
+typedef struct mv32 {
+  int32_t row;
+  int32_t col;
+} MV32;
+
+#if CONFIG_GLOBAL_MOTION
+// ALPHA here refers to parameters a and b in rotzoom model:
+// | a   b|
+// |-b   a|
+//
+// and a, b, c, d in affine model:
+// | a   b|
+// | c   d|
+//
+// Anything ending in PREC_BITS is the number of bits of precision
+// to maintain when converting from double to integer.
+//
+// The ABS parameters are used to create an upper and lower bound
+// for each parameter. In other words, after a parameter is integerized
+// it is clamped between -(1 << ABS_XXX_BITS) and (1 << ABS_XXX_BITS).
+//
+// XXX_PREC_DIFF and XXX_DECODE_FACTOR
+// are computed once here to prevent repetitive
+// computation on the decoder side. These are
+// to allow the global motion parameters to be encoded in a lower
+// precision than the warped model precision. This means that they
+// need to be changed to warped precision when they are decoded.
+//
+// XX_MIN, XX_MAX are also computed to avoid repeated computation
+
+#define GM_TRANS_PREC_BITS 5
+#define GM_TRANS_PREC_DIFF (WARPEDMODEL_PREC_BITS - GM_TRANS_PREC_BITS)
+#define GM_TRANS_DECODE_FACTOR (1 << GM_TRANS_PREC_DIFF)
+
+#define GM_ALPHA_PREC_BITS 5
+#define GM_ALPHA_PREC_DIFF (WARPEDMODEL_PREC_BITS - GM_ALPHA_PREC_BITS)
+#define GM_ALPHA_DECODE_FACTOR (1 << GM_ALPHA_PREC_DIFF)
+
+#define GM_ABS_ALPHA_BITS 8
+#define GM_ABS_TRANS_BITS 8
+
+#define GM_TRANS_MAX (1 << GM_ABS_TRANS_BITS)
+#define GM_ALPHA_MAX (1 << GM_ABS_ALPHA_BITS)
+#define GM_TRANS_MIN -GM_TRANS_MAX
+#define GM_ALPHA_MIN -GM_ALPHA_MAX
+
+typedef enum {
+  GLOBAL_ZERO = 0,
+  GLOBAL_TRANSLATION = 1,
+  GLOBAL_ROTZOOM = 2,
+  GLOBAL_AFFINE = 3,
+  GLOBAL_MOTION_TYPES
+} GLOBAL_MOTION_TYPE;
+
+typedef struct {
+  GLOBAL_MOTION_TYPE gmtype;
+  WarpedMotionParams motion_params;
+} Global_Motion_Params;
+
+static INLINE TransformationType gm_to_trans_type(GLOBAL_MOTION_TYPE gmtype) {
+  switch (gmtype) {
+    case GLOBAL_ZERO: return UNKNOWN_TRANSFORM; break;
+    case GLOBAL_TRANSLATION: return TRANSLATION; break;
+    case GLOBAL_ROTZOOM: return ROTZOOM; break;
+    case GLOBAL_AFFINE: return AFFINE; break;
+    default: assert(0);
+  }
+  return UNKNOWN_TRANSFORM;
+}
+
+static INLINE GLOBAL_MOTION_TYPE get_gmtype(const Global_Motion_Params *gm) {
+  if (gm->motion_params.wmmat[4] == 0 && gm->motion_params.wmmat[5] == 0) {
+    if (gm->motion_params.wmmat[2] == 0 && gm->motion_params.wmmat[3] == 0) {
+      return ((gm->motion_params.wmmat[0] | gm->motion_params.wmmat[1])
+                  ? GLOBAL_TRANSLATION
+                  : GLOBAL_ZERO);
+    } else {
+      return GLOBAL_ROTZOOM;
+    }
+  } else {
+    return GLOBAL_AFFINE;
+  }
+}
+#endif  // CONFIG_GLOBAL_MOTION
+
+#if CONFIG_REF_MV
+typedef struct candidate_mv {
+  int_mv this_mv;
+  int_mv comp_mv;
+  int_mv pred_mv;
+  int weight;
+} CANDIDATE_MV;
+#endif
+
+static INLINE int is_zero_mv(const MV *mv) {
+  return *((const uint32_t *)mv) == 0;
+}
+
+static INLINE int is_equal_mv(const MV *a, const MV *b) {
+  return *((const uint32_t *)a) == *((const uint32_t *)b);
+}
+
+static INLINE void clamp_mv(MV *mv, int min_col, int max_col, int min_row,
+                            int max_row) {
+  mv->col = clamp(mv->col, min_col, max_col);
+  mv->row = clamp(mv->row, min_row, max_row);
+}
+
+static INLINE int mv_has_subpel(const MV *mv) {
+  return (mv->row & SUBPEL_MASK) || (mv->col & SUBPEL_MASK);
+}
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_MV_H_
diff --git a/av1/common/mvref_common.c b/av1/common/mvref_common.c
new file mode 100644
index 0000000..836b065
--- /dev/null
+++ b/av1/common/mvref_common.c
@@ -0,0 +1,828 @@
+
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "av1/common/mvref_common.h"
+
+#if CONFIG_REF_MV
+
+static uint8_t add_ref_mv_candidate(
+    const MODE_INFO *const candidate_mi, const MB_MODE_INFO *const candidate,
+    const MV_REFERENCE_FRAME rf[2], uint8_t *refmv_count,
+    CANDIDATE_MV *ref_mv_stack, const int use_hp, int len, int block, int col) {
+  const int weight = len;
+  int index = 0, ref;
+  int newmv_count = 0;
+
+  assert(2 * weight < REF_CAT_LEVEL);
+
+  if (rf[1] == NONE) {
+    // single reference frame
+    for (ref = 0; ref < 2; ++ref) {
+      if (candidate->ref_frame[ref] == rf[0]) {
+        int_mv this_refmv = get_sub_block_mv(candidate_mi, ref, col, block);
+        lower_mv_precision(&this_refmv.as_mv, use_hp);
+
+        for (index = 0; index < *refmv_count; ++index)
+          if (ref_mv_stack[index].this_mv.as_int == this_refmv.as_int) break;
+
+        if (index < *refmv_count) ref_mv_stack[index].weight += 2 * weight;
+
+        // Add a new item to the list.
+        if (index == *refmv_count) {
+          ref_mv_stack[index].this_mv = this_refmv;
+          ref_mv_stack[index].pred_mv =
+              get_sub_block_pred_mv(candidate_mi, ref, col, block);
+          ref_mv_stack[index].weight = 2 * weight;
+          ++(*refmv_count);
+
+#if CONFIG_EXT_INTER
+          if (candidate->mode == NEWMV || candidate->mode == NEWFROMNEARMV)
+#else
+          if (candidate->mode == NEWMV)
+#endif  // CONFIG_EXT_INTER
+            ++newmv_count;
+        }
+
+        if (candidate_mi->mbmi.sb_type < BLOCK_8X8 && block >= 0) {
+          int alt_block = 3 - block;
+          this_refmv = get_sub_block_mv(candidate_mi, ref, col, alt_block);
+          lower_mv_precision(&this_refmv.as_mv, use_hp);
+
+          for (index = 0; index < *refmv_count; ++index)
+            if (ref_mv_stack[index].this_mv.as_int == this_refmv.as_int) break;
+
+          if (index < *refmv_count) ref_mv_stack[index].weight += weight;
+
+          // Add a new item to the list.
+          if (index == *refmv_count) {
+            ref_mv_stack[index].this_mv = this_refmv;
+            ref_mv_stack[index].pred_mv =
+                get_sub_block_pred_mv(candidate_mi, ref, col, alt_block);
+            ref_mv_stack[index].weight = weight;
+            ++(*refmv_count);
+
+#if CONFIG_EXT_INTER
+            if (candidate->mode == NEWMV || candidate->mode == NEWFROMNEARMV)
+#else
+            if (candidate->mode == NEWMV)
+#endif  // CONFIG_EXT_INTER
+              ++newmv_count;
+          }
+        }
+      }
+    }
+  } else {
+    // compound reference frame
+    if (candidate->ref_frame[0] == rf[0] && candidate->ref_frame[1] == rf[1]) {
+      int_mv this_refmv[2];
+
+      for (ref = 0; ref < 2; ++ref) {
+        this_refmv[ref] = get_sub_block_mv(candidate_mi, ref, col, block);
+        lower_mv_precision(&this_refmv[ref].as_mv, use_hp);
+      }
+
+      for (index = 0; index < *refmv_count; ++index)
+        if ((ref_mv_stack[index].this_mv.as_int == this_refmv[0].as_int) &&
+            (ref_mv_stack[index].comp_mv.as_int == this_refmv[1].as_int))
+          break;
+
+      if (index < *refmv_count) ref_mv_stack[index].weight += 2 * weight;
+
+      // Add a new item to the list.
+      if (index == *refmv_count) {
+        ref_mv_stack[index].this_mv = this_refmv[0];
+        ref_mv_stack[index].comp_mv = this_refmv[1];
+        ref_mv_stack[index].weight = 2 * weight;
+        ++(*refmv_count);
+
+#if CONFIG_EXT_INTER
+        if (candidate->mode == NEW_NEWMV)
+#else
+        if (candidate->mode == NEWMV)
+#endif  // CONFIG_EXT_INTER
+          ++newmv_count;
+      }
+
+      if (candidate_mi->mbmi.sb_type < BLOCK_8X8 && block >= 0) {
+        int alt_block = 3 - block;
+        this_refmv[0] = get_sub_block_mv(candidate_mi, 0, col, alt_block);
+        this_refmv[1] = get_sub_block_mv(candidate_mi, 1, col, alt_block);
+
+        for (ref = 0; ref < 2; ++ref)
+          lower_mv_precision(&this_refmv[ref].as_mv, use_hp);
+
+        for (index = 0; index < *refmv_count; ++index)
+          if (ref_mv_stack[index].this_mv.as_int == this_refmv[0].as_int &&
+              ref_mv_stack[index].comp_mv.as_int == this_refmv[1].as_int)
+            break;
+
+        if (index < *refmv_count) ref_mv_stack[index].weight += weight;
+
+        // Add a new item to the list.
+        if (index == *refmv_count) {
+          ref_mv_stack[index].this_mv = this_refmv[0];
+          ref_mv_stack[index].comp_mv = this_refmv[1];
+          ref_mv_stack[index].weight = weight;
+          ++(*refmv_count);
+
+#if CONFIG_EXT_INTER
+          if (candidate->mode == NEW_NEWMV)
+#else
+          if (candidate->mode == NEWMV)
+#endif  // CONFIG_EXT_INTER
+            ++newmv_count;
+        }
+      }
+    }
+  }
+  return newmv_count;
+}
+
+static uint8_t scan_row_mbmi(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+                             const int mi_row, const int mi_col, int block,
+                             const MV_REFERENCE_FRAME rf[2], int row_offset,
+                             CANDIDATE_MV *ref_mv_stack, uint8_t *refmv_count) {
+  const TileInfo *const tile = &xd->tile;
+  int i;
+  uint8_t newmv_count = 0;
+
+  for (i = 0; i < xd->n8_w && *refmv_count < MAX_REF_MV_STACK_SIZE;) {
+    POSITION mi_pos;
+    mi_pos.row = row_offset;
+    mi_pos.col = i;
+
+    if (is_inside(tile, mi_col, mi_row, &mi_pos)) {
+      const MODE_INFO *const candidate_mi =
+          xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col];
+      const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+      const int len =
+          VPXMIN(xd->n8_w, num_8x8_blocks_wide_lookup[candidate->sb_type]);
+
+      newmv_count += add_ref_mv_candidate(
+          candidate_mi, candidate, rf, refmv_count, ref_mv_stack,
+          cm->allow_high_precision_mv, len, block, mi_pos.col);
+      i += len;
+    } else {
+      ++i;
+    }
+  }
+
+  return newmv_count;
+}
+
+static uint8_t scan_col_mbmi(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+                             const int mi_row, const int mi_col, int block,
+                             const MV_REFERENCE_FRAME rf[2], int col_offset,
+                             CANDIDATE_MV *ref_mv_stack, uint8_t *refmv_count) {
+  const TileInfo *const tile = &xd->tile;
+  int i;
+  uint8_t newmv_count = 0;
+
+  for (i = 0; i < xd->n8_h && *refmv_count < MAX_REF_MV_STACK_SIZE;) {
+    POSITION mi_pos;
+    mi_pos.row = i;
+    mi_pos.col = col_offset;
+
+    if (is_inside(tile, mi_col, mi_row, &mi_pos)) {
+      const MODE_INFO *const candidate_mi =
+          xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col];
+      const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+      const int len =
+          VPXMIN(xd->n8_h, num_8x8_blocks_high_lookup[candidate->sb_type]);
+
+      newmv_count += add_ref_mv_candidate(
+          candidate_mi, candidate, rf, refmv_count, ref_mv_stack,
+          cm->allow_high_precision_mv, len, block, mi_pos.col);
+      i += len;
+    } else {
+      ++i;
+    }
+  }
+
+  return newmv_count;
+}
+
+static uint8_t scan_blk_mbmi(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+                             const int mi_row, const int mi_col, int block,
+                             const MV_REFERENCE_FRAME rf[2], int row_offset,
+                             int col_offset, CANDIDATE_MV *ref_mv_stack,
+                             uint8_t *refmv_count) {
+  const TileInfo *const tile = &xd->tile;
+  POSITION mi_pos;
+  uint8_t newmv_count = 0;
+
+  mi_pos.row = row_offset;
+  mi_pos.col = col_offset;
+
+  if (is_inside(tile, mi_col, mi_row, &mi_pos) &&
+      *refmv_count < MAX_REF_MV_STACK_SIZE) {
+    const MODE_INFO *const candidate_mi =
+        xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col];
+    const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+    const int len = 1;
+
+    newmv_count += add_ref_mv_candidate(
+        candidate_mi, candidate, rf, refmv_count, ref_mv_stack,
+        cm->allow_high_precision_mv, len, block, mi_pos.col);
+  }  // Analyze a single 8x8 block motion information.
+  return newmv_count;
+}
+
+static int has_top_right(const MACROBLOCKD *xd, int mi_row, int mi_col,
+                         int bs) {
+  // In a split partition all apart from the bottom right has a top right
+  int has_tr = !((mi_row & bs) && (mi_col & bs));
+
+  // bs > 0 and bs is a power of 2
+  assert(bs > 0 && !(bs & (bs - 1)));
+
+  // For each 4x4 group of blocks, when the bottom right is decoded the blocks
+  // to the right have not been decoded therefore the bottom right does
+  // not have a top right
+  while (bs < MAX_MIB_SIZE) {
+    if (mi_col & bs) {
+      if ((mi_col & (2 * bs)) && (mi_row & (2 * bs))) {
+        has_tr = 0;
+        break;
+      }
+    } else {
+      break;
+    }
+    bs <<= 1;
+  }
+
+  // The left hand of two vertical rectangles always has a top right (as the
+  // block above will have been decoded)
+  if (xd->n8_w < xd->n8_h)
+    if (!xd->is_sec_rect) has_tr = 1;
+
+  // The bottom of two horizontal rectangles never has a top right (as the block
+  // to the right won't have been decoded)
+  if (xd->n8_w > xd->n8_h)
+    if (xd->is_sec_rect) has_tr = 0;
+
+#if CONFIG_EXT_PARTITION_TYPES
+  // The bottom left square of a Vertical A does not have a top right as it is
+  // decoded before the right hand rectangle of the partition
+  if (xd->mi[0]->mbmi.partition == PARTITION_VERT_A)
+    if ((mi_row & bs) && !(mi_col & bs)) has_tr = 0;
+#endif  // CONFIG_EXT_PARTITION_TYPES
+
+  return has_tr;
+}
+
+static void handle_sec_rect_block(const MB_MODE_INFO *const candidate,
+                                  uint8_t refmv_count,
+                                  CANDIDATE_MV *ref_mv_stack,
+                                  MV_REFERENCE_FRAME ref_frame,
+                                  int16_t *mode_context) {
+  int rf, idx;
+
+  for (rf = 0; rf < 2; ++rf) {
+    if (candidate->ref_frame[rf] == ref_frame) {
+      const int list_range = VPXMIN(refmv_count, MAX_MV_REF_CANDIDATES);
+
+      const int_mv pred_mv = candidate->mv[rf];
+      for (idx = 0; idx < list_range; ++idx)
+        if (pred_mv.as_int == ref_mv_stack[idx].this_mv.as_int) break;
+
+      if (idx < list_range) {
+        if (idx == 0)
+          mode_context[ref_frame] |= (1 << SKIP_NEARESTMV_OFFSET);
+        else if (idx == 1)
+          mode_context[ref_frame] |= (1 << SKIP_NEARMV_OFFSET);
+      }
+    }
+  }
+}
+
+static void setup_ref_mv_list(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+                              MV_REFERENCE_FRAME ref_frame,
+                              uint8_t *refmv_count, CANDIDATE_MV *ref_mv_stack,
+                              int_mv *mv_ref_list, int block, int mi_row,
+                              int mi_col, int16_t *mode_context) {
+  int idx, nearest_refmv_count = 0;
+  uint8_t newmv_count = 0;
+
+  CANDIDATE_MV tmp_mv;
+  int len, nr_len;
+
+  const MV_REF *const prev_frame_mvs_base =
+      cm->use_prev_frame_mvs
+          ? cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col
+          : NULL;
+
+  int bs = VPXMAX(xd->n8_w, xd->n8_h);
+  int has_tr = has_top_right(xd, mi_row, mi_col, bs);
+
+  MV_REFERENCE_FRAME rf[2];
+  vp10_set_ref_frame(rf, ref_frame);
+
+  mode_context[ref_frame] = 0;
+  *refmv_count = 0;
+
+  // Scan the first above row mode info.
+  newmv_count = scan_row_mbmi(cm, xd, mi_row, mi_col, block, rf, -1,
+                              ref_mv_stack, refmv_count);
+  // Scan the first left column mode info.
+  newmv_count += scan_col_mbmi(cm, xd, mi_row, mi_col, block, rf, -1,
+                               ref_mv_stack, refmv_count);
+
+  // Check top-right boundary
+  if (has_tr)
+    newmv_count += scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf, -1, 1,
+                                 ref_mv_stack, refmv_count);
+
+  nearest_refmv_count = *refmv_count;
+
+  for (idx = 0; idx < nearest_refmv_count; ++idx) {
+    assert(ref_mv_stack[idx].weight > 0 &&
+           ref_mv_stack[idx].weight < REF_CAT_LEVEL);
+    ref_mv_stack[idx].weight += REF_CAT_LEVEL;
+  }
+
+  if (prev_frame_mvs_base && cm->show_frame && cm->last_show_frame &&
+      rf[1] == NONE) {
+    int ref;
+    int blk_row, blk_col;
+
+    for (blk_row = 0; blk_row < xd->n8_h; ++blk_row) {
+      for (blk_col = 0; blk_col < xd->n8_w; ++blk_col) {
+        const MV_REF *prev_frame_mvs =
+            prev_frame_mvs_base + blk_row * cm->mi_cols + blk_col;
+
+        POSITION mi_pos;
+        mi_pos.row = blk_row;
+        mi_pos.col = blk_col;
+
+        if (!is_inside(&xd->tile, mi_col, mi_row, &mi_pos)) continue;
+
+        for (ref = 0; ref < 2; ++ref) {
+          if (prev_frame_mvs->ref_frame[ref] == ref_frame) {
+            int_mv this_refmv = prev_frame_mvs->mv[ref];
+            lower_mv_precision(&this_refmv.as_mv, cm->allow_high_precision_mv);
+
+            for (idx = 0; idx < *refmv_count; ++idx)
+              if (this_refmv.as_int == ref_mv_stack[idx].this_mv.as_int) break;
+
+            if (idx < *refmv_count) ref_mv_stack[idx].weight += 2;
+
+            if (idx == *refmv_count && *refmv_count < MAX_REF_MV_STACK_SIZE) {
+              ref_mv_stack[idx].this_mv.as_int = this_refmv.as_int;
+              ref_mv_stack[idx].weight = 2;
+              ++(*refmv_count);
+
+              if (abs(ref_mv_stack[idx].this_mv.as_mv.row) >= 8 ||
+                  abs(ref_mv_stack[idx].this_mv.as_mv.col) >= 8)
+                mode_context[ref_frame] |= (1 << ZEROMV_OFFSET);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  if (*refmv_count == nearest_refmv_count)
+    mode_context[ref_frame] |= (1 << ZEROMV_OFFSET);
+
+  // Analyze the top-left corner block mode info.
+  //  scan_blk_mbmi(cm, xd, mi_row, mi_col, block, ref_frame,
+  //                -1, -1, ref_mv_stack, refmv_count);
+
+  // Scan the second outer area.
+  scan_row_mbmi(cm, xd, mi_row, mi_col, block, rf, -2, ref_mv_stack,
+                refmv_count);
+  scan_col_mbmi(cm, xd, mi_row, mi_col, block, rf, -2, ref_mv_stack,
+                refmv_count);
+
+  // Scan the third outer area.
+  scan_row_mbmi(cm, xd, mi_row, mi_col, block, rf, -3, ref_mv_stack,
+                refmv_count);
+  scan_col_mbmi(cm, xd, mi_row, mi_col, block, rf, -3, ref_mv_stack,
+                refmv_count);
+
+  // Scan the fourth outer area.
+  scan_row_mbmi(cm, xd, mi_row, mi_col, block, rf, -4, ref_mv_stack,
+                refmv_count);
+  // Scan the third left row mode info.
+  scan_col_mbmi(cm, xd, mi_row, mi_col, block, rf, -4, ref_mv_stack,
+                refmv_count);
+
+  switch (nearest_refmv_count) {
+    case 0:
+      mode_context[ref_frame] |= 0;
+      if (*refmv_count >= 1) mode_context[ref_frame] |= 1;
+
+      if (*refmv_count == 1)
+        mode_context[ref_frame] |= (1 << REFMV_OFFSET);
+      else if (*refmv_count >= 2)
+        mode_context[ref_frame] |= (2 << REFMV_OFFSET);
+      break;
+    case 1:
+      mode_context[ref_frame] |= (newmv_count > 0) ? 2 : 3;
+
+      if (*refmv_count == 1)
+        mode_context[ref_frame] |= (3 << REFMV_OFFSET);
+      else if (*refmv_count >= 2)
+        mode_context[ref_frame] |= (4 << REFMV_OFFSET);
+      break;
+
+    case 2:
+    default:
+      if (newmv_count >= 2)
+        mode_context[ref_frame] |= 4;
+      else if (newmv_count == 1)
+        mode_context[ref_frame] |= 5;
+      else
+        mode_context[ref_frame] |= 6;
+
+      mode_context[ref_frame] |= (5 << REFMV_OFFSET);
+      break;
+  }
+
+  // Rank the likelihood and assign nearest and near mvs.
+  len = nearest_refmv_count;
+  while (len > 0) {
+    nr_len = 0;
+    for (idx = 1; idx < len; ++idx) {
+      if (ref_mv_stack[idx - 1].weight < ref_mv_stack[idx].weight) {
+        tmp_mv = ref_mv_stack[idx - 1];
+        ref_mv_stack[idx - 1] = ref_mv_stack[idx];
+        ref_mv_stack[idx] = tmp_mv;
+        nr_len = idx;
+      }
+    }
+    len = nr_len;
+  }
+
+  len = *refmv_count;
+  while (len > nearest_refmv_count) {
+    nr_len = nearest_refmv_count;
+    for (idx = nearest_refmv_count + 1; idx < len; ++idx) {
+      if (ref_mv_stack[idx - 1].weight < ref_mv_stack[idx].weight) {
+        tmp_mv = ref_mv_stack[idx - 1];
+        ref_mv_stack[idx - 1] = ref_mv_stack[idx];
+        ref_mv_stack[idx] = tmp_mv;
+        nr_len = idx;
+      }
+    }
+    len = nr_len;
+  }
+
+  // TODO(jingning): Clean-up needed.
+  if (xd->is_sec_rect) {
+    if (xd->n8_w < xd->n8_h) {
+      const MODE_INFO *const candidate_mi = xd->mi[-1];
+      const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+      handle_sec_rect_block(candidate, nearest_refmv_count, ref_mv_stack,
+                            ref_frame, mode_context);
+    }
+
+    if (xd->n8_w > xd->n8_h) {
+      const MODE_INFO *const candidate_mi = xd->mi[-xd->mi_stride];
+      const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+      handle_sec_rect_block(candidate, nearest_refmv_count, ref_mv_stack,
+                            ref_frame, mode_context);
+    }
+  }
+
+  if (rf[1] > NONE) {
+    for (idx = 0; idx < *refmv_count; ++idx) {
+      clamp_mv_ref(&ref_mv_stack[idx].this_mv.as_mv, xd->n8_w << 3,
+                   xd->n8_h << 3, xd);
+      clamp_mv_ref(&ref_mv_stack[idx].comp_mv.as_mv, xd->n8_w << 3,
+                   xd->n8_h << 3, xd);
+    }
+  } else {
+    for (idx = 0; idx < VPXMIN(MAX_MV_REF_CANDIDATES, *refmv_count); ++idx) {
+      mv_ref_list[idx].as_int = ref_mv_stack[idx].this_mv.as_int;
+      clamp_mv_ref(&mv_ref_list[idx].as_mv, xd->n8_w << 3, xd->n8_h << 3, xd);
+    }
+  }
+}
+#endif
+
+// This function searches the neighbourhood of a given MB/SB
+// to try and find candidate reference vectors.
+static void find_mv_refs_idx(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+                             MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
+                             int_mv *mv_ref_list, int block, int mi_row,
+                             int mi_col, find_mv_refs_sync sync,
+                             void *const data, int16_t *mode_context) {
+  const int *ref_sign_bias = cm->ref_frame_sign_bias;
+  int i, refmv_count = 0;
+  const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
+  int different_ref_found = 0;
+  int context_counter = 0;
+  const MV_REF *const prev_frame_mvs =
+      cm->use_prev_frame_mvs
+          ? cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col
+          : NULL;
+  const TileInfo *const tile = &xd->tile;
+  const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type] << 3;
+  const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type] << 3;
+
+  // The nearest 2 blocks are treated differently
+  // if the size < 8x8 we get the mv from the bmi substructure,
+  // and we also need to keep a mode count.
+  for (i = 0; i < 2; ++i) {
+    const POSITION *const mv_ref = &mv_ref_search[i];
+    if (is_inside(tile, mi_col, mi_row, mv_ref)) {
+      const MODE_INFO *const candidate_mi =
+          xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
+      const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+      // Keep counts for entropy encoding.
+      context_counter += mode_2_counter[candidate->mode];
+      different_ref_found = 1;
+
+      if (candidate->ref_frame[0] == ref_frame)
+        ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, block),
+                        refmv_count, mv_ref_list, bw, bh, xd, Done);
+      else if (candidate->ref_frame[1] == ref_frame)
+        ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1, mv_ref->col, block),
+                        refmv_count, mv_ref_list, bw, bh, xd, Done);
+    }
+  }
+
+  // Check the rest of the neighbors in much the same way
+  // as before except we don't need to keep track of sub blocks or
+  // mode counts.
+  for (; i < MVREF_NEIGHBOURS; ++i) {
+    const POSITION *const mv_ref = &mv_ref_search[i];
+    if (is_inside(tile, mi_col, mi_row, mv_ref)) {
+      const MB_MODE_INFO *const candidate =
+          &xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]->mbmi;
+      different_ref_found = 1;
+
+      if (candidate->ref_frame[0] == ref_frame)
+        ADD_MV_REF_LIST(candidate->mv[0], refmv_count, mv_ref_list, bw, bh, xd,
+                        Done);
+      else if (candidate->ref_frame[1] == ref_frame)
+        ADD_MV_REF_LIST(candidate->mv[1], refmv_count, mv_ref_list, bw, bh, xd,
+                        Done);
+    }
+  }
+
+// TODO(hkuang): Remove this sync after fixing pthread_cond_broadcast
+// on windows platform. The sync here is unncessary if use_perv_frame_mvs
+// is 0. But after removing it, there will be hang in the unit test on windows
+// due to several threads waiting for a thread's signal.
+#if defined(_WIN32) && !HAVE_PTHREAD_H
+  if (cm->frame_parallel_decode && sync != NULL) {
+    sync(data, mi_row);
+  }
+#endif
+
+  // Check the last frame's mode and mv info.
+  if (cm->use_prev_frame_mvs) {
+    // Synchronize here for frame parallel decode if sync function is provided.
+    if (cm->frame_parallel_decode && sync != NULL) {
+      sync(data, mi_row);
+    }
+
+    if (prev_frame_mvs->ref_frame[0] == ref_frame) {
+      ADD_MV_REF_LIST(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, bw, bh,
+                      xd, Done);
+    } else if (prev_frame_mvs->ref_frame[1] == ref_frame) {
+      ADD_MV_REF_LIST(prev_frame_mvs->mv[1], refmv_count, mv_ref_list, bw, bh,
+                      xd, Done);
+    }
+  }
+
+  // Since we couldn't find 2 mvs from the same reference frame
+  // go back through the neighbors and find motion vectors from
+  // different reference frames.
+  if (different_ref_found) {
+    for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
+      const POSITION *mv_ref = &mv_ref_search[i];
+      if (is_inside(tile, mi_col, mi_row, mv_ref)) {
+        const MB_MODE_INFO *const candidate =
+            &xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]->mbmi;
+
+        // If the candidate is INTRA we don't want to consider its mv.
+        IF_DIFF_REF_FRAME_ADD_MV(candidate, ref_frame, ref_sign_bias,
+                                 refmv_count, mv_ref_list, bw, bh, xd, Done);
+      }
+    }
+  }
+
+  // Since we still don't have a candidate we'll try the last frame.
+  if (cm->use_prev_frame_mvs) {
+    if (prev_frame_mvs->ref_frame[0] != ref_frame &&
+        prev_frame_mvs->ref_frame[0] > INTRA_FRAME) {
+      int_mv mv = prev_frame_mvs->mv[0];
+      if (ref_sign_bias[prev_frame_mvs->ref_frame[0]] !=
+          ref_sign_bias[ref_frame]) {
+        mv.as_mv.row *= -1;
+        mv.as_mv.col *= -1;
+      }
+      ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, bw, bh, xd, Done);
+    }
+
+    if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME &&
+        prev_frame_mvs->ref_frame[1] != ref_frame) {
+      int_mv mv = prev_frame_mvs->mv[1];
+      if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] !=
+          ref_sign_bias[ref_frame]) {
+        mv.as_mv.row *= -1;
+        mv.as_mv.col *= -1;
+      }
+      ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, bw, bh, xd, Done);
+    }
+  }
+
+Done:
+  if (mode_context)
+    mode_context[ref_frame] = counter_to_context[context_counter];
+  for (i = refmv_count; i < MAX_MV_REF_CANDIDATES; ++i)
+    mv_ref_list[i].as_int = 0;
+}
+
+#if CONFIG_EXT_INTER
+// This function keeps a mode count for a given MB/SB
+void vp10_update_mv_context(const MACROBLOCKD *xd, MODE_INFO *mi,
+                            MV_REFERENCE_FRAME ref_frame, int_mv *mv_ref_list,
+                            int block, int mi_row, int mi_col,
+                            int16_t *mode_context) {
+  int i, refmv_count = 0;
+  const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
+  int context_counter = 0;
+  const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type] << 3;
+  const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type] << 3;
+  const TileInfo *const tile = &xd->tile;
+
+  // Blank the reference vector list
+  memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
+
+  // The nearest 2 blocks are examined only.
+  // If the size < 8x8, we get the mv from the bmi substructure;
+  for (i = 0; i < 2; ++i) {
+    const POSITION *const mv_ref = &mv_ref_search[i];
+    if (is_inside(tile, mi_col, mi_row, mv_ref)) {
+      const MODE_INFO *const candidate_mi =
+          xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
+      const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+
+      // Keep counts for entropy encoding.
+      context_counter += mode_2_counter[candidate->mode];
+
+      if (candidate->ref_frame[0] == ref_frame) {
+        ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, block),
+                        refmv_count, mv_ref_list, bw, bh, xd, Done);
+      } else if (candidate->ref_frame[1] == ref_frame) {
+        ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1, mv_ref->col, block),
+                        refmv_count, mv_ref_list, bw, bh, xd, Done);
+      }
+    }
+  }
+
+Done:
+
+  if (mode_context)
+    mode_context[ref_frame] = counter_to_context[context_counter];
+}
+#endif  // CONFIG_EXT_INTER
+
+void vp10_find_mv_refs(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+                       MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
+#if CONFIG_REF_MV
+                       uint8_t *ref_mv_count, CANDIDATE_MV *ref_mv_stack,
+#if CONFIG_EXT_INTER
+                       int16_t *compound_mode_context,
+#endif  // CONFIG_EXT_INTER
+#endif
+                       int_mv *mv_ref_list, int mi_row, int mi_col,
+                       find_mv_refs_sync sync, void *const data,
+                       int16_t *mode_context) {
+#if CONFIG_REF_MV
+  int idx, all_zero = 1;
+#endif
+#if CONFIG_EXT_INTER
+  vp10_update_mv_context(xd, mi, ref_frame, mv_ref_list, -1, mi_row, mi_col,
+#if CONFIG_REF_MV
+                         compound_mode_context);
+#else
+                         mode_context);
+#endif  // CONFIG_REF_MV
+  find_mv_refs_idx(cm, xd, mi, ref_frame, mv_ref_list, -1, mi_row, mi_col, sync,
+                   data, NULL);
+#else
+  find_mv_refs_idx(cm, xd, mi, ref_frame, mv_ref_list, -1, mi_row, mi_col, sync,
+                   data, mode_context);
+#endif  // CONFIG_EXT_INTER
+
+#if CONFIG_REF_MV
+  setup_ref_mv_list(cm, xd, ref_frame, ref_mv_count, ref_mv_stack, mv_ref_list,
+                    -1, mi_row, mi_col, mode_context);
+
+  for (idx = 0; idx < MAX_MV_REF_CANDIDATES; ++idx)
+    if (mv_ref_list[idx].as_int != 0) all_zero = 0;
+
+  if (all_zero) mode_context[ref_frame] |= (1 << ALL_ZERO_FLAG_OFFSET);
+#endif
+}
+
+void vp10_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *nearest_mv,
+                            int_mv *near_mv) {
+  int i;
+  // Make sure all the candidates are properly clamped etc
+  for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
+    lower_mv_precision(&mvlist[i].as_mv, allow_hp);
+  }
+  *nearest_mv = mvlist[0];
+  *near_mv = mvlist[1];
+}
+
+void vp10_append_sub8x8_mvs_for_idx(VP10_COMMON *cm, MACROBLOCKD *xd, int block,
+                                    int ref, int mi_row, int mi_col,
+#if CONFIG_REF_MV
+                                    CANDIDATE_MV *ref_mv_stack,
+                                    uint8_t *ref_mv_count,
+#endif
+#if CONFIG_EXT_INTER
+                                    int_mv *mv_list,
+#endif  // CONFIG_EXT_INTER
+                                    int_mv *nearest_mv, int_mv *near_mv) {
+#if !CONFIG_EXT_INTER
+  int_mv mv_list[MAX_MV_REF_CANDIDATES];
+#endif  // !CONFIG_EXT_INTER
+  MODE_INFO *const mi = xd->mi[0];
+  b_mode_info *bmi = mi->bmi;
+  int n;
+#if CONFIG_REF_MV
+  CANDIDATE_MV tmp_mv;
+  uint8_t idx;
+  uint8_t above_count = 0, left_count = 0;
+  MV_REFERENCE_FRAME rf[2] = { mi->mbmi.ref_frame[ref], NONE };
+  *ref_mv_count = 0;
+#endif
+
+  assert(MAX_MV_REF_CANDIDATES == 2);
+
+  find_mv_refs_idx(cm, xd, mi, mi->mbmi.ref_frame[ref], mv_list, block, mi_row,
+                   mi_col, NULL, NULL, NULL);
+
+#if CONFIG_REF_MV
+  scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf, -1, 0, ref_mv_stack,
+                ref_mv_count);
+  above_count = *ref_mv_count;
+
+  scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf, 0, -1, ref_mv_stack,
+                ref_mv_count);
+  left_count = *ref_mv_count - above_count;
+
+  if (above_count > 1 && left_count > 0) {
+    tmp_mv = ref_mv_stack[1];
+    ref_mv_stack[1] = ref_mv_stack[above_count];
+    ref_mv_stack[above_count] = tmp_mv;
+  }
+
+  for (idx = 0; idx < *ref_mv_count; ++idx)
+    clamp_mv_ref(&ref_mv_stack[idx].this_mv.as_mv, xd->n8_w << 3, xd->n8_h << 3,
+                 xd);
+
+  for (idx = 0; idx < VPXMIN(MAX_MV_REF_CANDIDATES, *ref_mv_count); ++idx)
+    mv_list[idx].as_int = ref_mv_stack[idx].this_mv.as_int;
+#endif
+
+  near_mv->as_int = 0;
+  switch (block) {
+    case 0:
+      nearest_mv->as_int = mv_list[0].as_int;
+      near_mv->as_int = mv_list[1].as_int;
+      break;
+    case 1:
+    case 2:
+      nearest_mv->as_int = bmi[0].as_mv[ref].as_int;
+      for (n = 0; n < MAX_MV_REF_CANDIDATES; ++n)
+        if (nearest_mv->as_int != mv_list[n].as_int) {
+          near_mv->as_int = mv_list[n].as_int;
+          break;
+        }
+      break;
+    case 3: {
+      int_mv candidates[2 + MAX_MV_REF_CANDIDATES];
+      candidates[0] = bmi[1].as_mv[ref];
+      candidates[1] = bmi[0].as_mv[ref];
+      candidates[2] = mv_list[0];
+      candidates[3] = mv_list[1];
+
+      nearest_mv->as_int = bmi[2].as_mv[ref].as_int;
+      for (n = 0; n < 2 + MAX_MV_REF_CANDIDATES; ++n)
+        if (nearest_mv->as_int != candidates[n].as_int) {
+          near_mv->as_int = candidates[n].as_int;
+          break;
+        }
+      break;
+    }
+    default: assert(0 && "Invalid block index.");
+  }
+}
diff --git a/av1/common/mvref_common.h b/av1/common/mvref_common.h
new file mode 100644
index 0000000..babd4f0
--- /dev/null
+++ b/av1/common/mvref_common.h
@@ -0,0 +1,491 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef VP10_COMMON_MVREF_COMMON_H_
+#define VP10_COMMON_MVREF_COMMON_H_
+
+#include "av1/common/onyxc_int.h"
+#include "av1/common/blockd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MVREF_NEIGHBOURS 8
+
+typedef struct position {
+  int row;
+  int col;
+} POSITION;
+
+typedef enum {
+  BOTH_ZERO = 0,
+  ZERO_PLUS_PREDICTED = 1,
+  BOTH_PREDICTED = 2,
+  NEW_PLUS_NON_INTRA = 3,
+  BOTH_NEW = 4,
+  INTRA_PLUS_NON_INTRA = 5,
+  BOTH_INTRA = 6,
+  INVALID_CASE = 9
+} motion_vector_context;
+
+// This is used to figure out a context for the ref blocks. The code flattens
+// an array that would have 3 possible counts (0, 1 & 2) for 3 choices by
+// adding 9 for each intra block, 3 for each zero mv and 1 for each new
+// motion vector. This single number is then converted into a context
+// with a single lookup ( counter_to_context ).
+static const int mode_2_counter[MB_MODE_COUNT] = {
+  9,  // DC_PRED
+  9,  // V_PRED
+  9,  // H_PRED
+  9,  // D45_PRED
+  9,  // D135_PRED
+  9,  // D117_PRED
+  9,  // D153_PRED
+  9,  // D207_PRED
+  9,  // D63_PRED
+  9,  // TM_PRED
+  0,  // NEARESTMV
+  0,  // NEARMV
+  3,  // ZEROMV
+  1,  // NEWMV
+#if CONFIG_EXT_INTER
+  1,    // NEWFROMNEARMV
+  0,    // NEAREST_NEARESTMV
+  0,    // NEAREST_NEARMV
+  0,    // NEAR_NEARESTMV
+  0,    // NEAR_NEARMV
+  1,    // NEAREST_NEWMV
+  1,    // NEW_NEARESTMV
+  1,    // NEAR_NEWMV
+  1,    // NEW_NEARMV
+  3,    // ZERO_ZEROMV
+  1,    // NEW_NEWMV
+#endif  // CONFIG_EXT_INTER
+};
+
+// There are 3^3 different combinations of 3 counts that can be either 0,1 or
+// 2. However the actual count can never be greater than 2 so the highest
+// counter we need is 18. 9 is an invalid counter that's never used.
+static const int counter_to_context[19] = {
+  BOTH_PREDICTED,        // 0
+  NEW_PLUS_NON_INTRA,    // 1
+  BOTH_NEW,              // 2
+  ZERO_PLUS_PREDICTED,   // 3
+  NEW_PLUS_NON_INTRA,    // 4
+  INVALID_CASE,          // 5
+  BOTH_ZERO,             // 6
+  INVALID_CASE,          // 7
+  INVALID_CASE,          // 8
+  INTRA_PLUS_NON_INTRA,  // 9
+  INTRA_PLUS_NON_INTRA,  // 10
+  INVALID_CASE,          // 11
+  INTRA_PLUS_NON_INTRA,  // 12
+  INVALID_CASE,          // 13
+  INVALID_CASE,          // 14
+  INVALID_CASE,          // 15
+  INVALID_CASE,          // 16
+  INVALID_CASE,          // 17
+  BOTH_INTRA             // 18
+};
+
+static const POSITION mv_ref_blocks[BLOCK_SIZES][MVREF_NEIGHBOURS] = {
+  // 4X4
+  { { -1, 0 },
+    { 0, -1 },
+    { -1, -1 },
+    { -2, 0 },
+    { 0, -2 },
+    { -2, -1 },
+    { -1, -2 },
+    { -2, -2 } },
+  // 4X8
+  { { -1, 0 },
+    { 0, -1 },
+    { -1, -1 },
+    { -2, 0 },
+    { 0, -2 },
+    { -2, -1 },
+    { -1, -2 },
+    { -2, -2 } },
+  // 8X4
+  { { -1, 0 },
+    { 0, -1 },
+    { -1, -1 },
+    { -2, 0 },
+    { 0, -2 },
+    { -2, -1 },
+    { -1, -2 },
+    { -2, -2 } },
+  // 8X8
+  { { -1, 0 },
+    { 0, -1 },
+    { -1, -1 },
+    { -2, 0 },
+    { 0, -2 },
+    { -2, -1 },
+    { -1, -2 },
+    { -2, -2 } },
+  // 8X16
+  { { 0, -1 },
+    { -1, 0 },
+    { 1, -1 },
+    { -1, -1 },
+    { 0, -2 },
+    { -2, 0 },
+    { -2, -1 },
+    { -1, -2 } },
+  // 16X8
+  { { -1, 0 },
+    { 0, -1 },
+    { -1, 1 },
+    { -1, -1 },
+    { -2, 0 },
+    { 0, -2 },
+    { -1, -2 },
+    { -2, -1 } },
+  // 16X16
+  { { -1, 0 },
+    { 0, -1 },
+    { -1, 1 },
+    { 1, -1 },
+    { -1, -1 },
+    { -3, 0 },
+    { 0, -3 },
+    { -3, -3 } },
+  // 16X32
+  { { 0, -1 },
+    { -1, 0 },
+    { 2, -1 },
+    { -1, -1 },
+    { -1, 1 },
+    { 0, -3 },
+    { -3, 0 },
+    { -3, -3 } },
+  // 32X16
+  { { -1, 0 },
+    { 0, -1 },
+    { -1, 2 },
+    { -1, -1 },
+    { 1, -1 },
+    { -3, 0 },
+    { 0, -3 },
+    { -3, -3 } },
+  // 32X32
+  { { -1, 1 },
+    { 1, -1 },
+    { -1, 2 },
+    { 2, -1 },
+    { -1, -1 },
+    { -3, 0 },
+    { 0, -3 },
+    { -3, -3 } },
+  // 32X64
+  { { 0, -1 },
+    { -1, 0 },
+    { 4, -1 },
+    { -1, 2 },
+    { -1, -1 },
+    { 0, -3 },
+    { -3, 0 },
+    { 2, -1 } },
+  // 64X32
+  { { -1, 0 },
+    { 0, -1 },
+    { -1, 4 },
+    { 2, -1 },
+    { -1, -1 },
+    { -3, 0 },
+    { 0, -3 },
+    { -1, 2 } },
+  // 64X64
+  { { -1, 3 },
+    { 3, -1 },
+    { -1, 4 },
+    { 4, -1 },
+    { -1, -1 },
+    { -1, 0 },
+    { 0, -1 },
+    { -1, 6 } },
+#if CONFIG_EXT_PARTITION
+  // TODO(debargha/jingning) Making them twice the 32x64, .. ones above
+  // 64x128
+  { { 0, -2 },
+    { -2, 0 },
+    { 8, -2 },
+    { -2, 4 },
+    { -2, -2 },
+    { 0, -6 },
+    { -6, 0 },
+    { 4, -2 } },
+  // 128x64
+  { { -2, 0 },
+    { 0, -2 },
+    { -2, 8 },
+    { 4, -2 },
+    { -2, -2 },
+    { -6, 0 },
+    { 0, -6 },
+    { -2, 4 } },
+  // 128x128
+  { { -2, 6 },
+    { 6, -2 },
+    { -2, 8 },
+    { 8, -2 },
+    { -2, -2 },
+    { -2, 0 },
+    { 0, -2 },
+    { -2, 12 } },
+#endif  // CONFIG_EXT_PARTITION
+};
+
+static const int idx_n_column_to_subblock[4][2] = {
+  { 1, 2 }, { 1, 3 }, { 3, 2 }, { 3, 3 }
+};
+
+// clamp_mv_ref
+#if CONFIG_EXT_PARTITION
+#define MV_BORDER (16 << 3)  // Allow 16 pels in 1/8th pel units
+#else
+#define MV_BORDER (8 << 3)  // Allow 8 pels in 1/8th pel units
+#endif                      // CONFIG_EXT_PARTITION
+
+static INLINE void clamp_mv_ref(MV *mv, int bw, int bh, const MACROBLOCKD *xd) {
+  clamp_mv(mv, xd->mb_to_left_edge - bw * 8 - MV_BORDER,
+           xd->mb_to_right_edge + bw * 8 + MV_BORDER,
+           xd->mb_to_top_edge - bh * 8 - MV_BORDER,
+           xd->mb_to_bottom_edge + bh * 8 + MV_BORDER);
+}
+
+// This function returns either the appropriate sub block or block's mv
+// on whether the block_size < 8x8 and we have check_sub_blocks set.
+static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv,
+                                      int search_col, int block_idx) {
+  return block_idx >= 0 && candidate->mbmi.sb_type < BLOCK_8X8
+             ? candidate
+                   ->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]]
+                   .as_mv[which_mv]
+             : candidate->mbmi.mv[which_mv];
+}
+
+#if CONFIG_REF_MV
+static INLINE int_mv get_sub_block_pred_mv(const MODE_INFO *candidate,
+                                           int which_mv, int search_col,
+                                           int block_idx) {
+  return block_idx >= 0 && candidate->mbmi.sb_type < BLOCK_8X8
+             ? candidate
+                   ->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]]
+                   .pred_mv_s8[which_mv]
+             : candidate->mbmi.pred_mv[which_mv];
+}
+#endif
+
+// Performs mv sign inversion if indicated by the reference frame combination.
+static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
+                              const MV_REFERENCE_FRAME this_ref_frame,
+                              const int *ref_sign_bias) {
+  int_mv mv = mbmi->mv[ref];
+  if (ref_sign_bias[mbmi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) {
+    mv.as_mv.row *= -1;
+    mv.as_mv.col *= -1;
+  }
+  return mv;
+}
+
+#define CLIP_IN_ADD(mv, bw, bh, xd) clamp_mv_ref(mv, bw, bh, xd)
+
+// This macro is used to add a motion vector mv_ref list if it isn't
+// already in the list.  If it's the second motion vector it will also
+// skip all additional processing and jump to done!
+#define ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, bw, bh, xd, Done)      \
+  do {                                                                       \
+    (mv_ref_list)[(refmv_count)] = (mv);                                     \
+    CLIP_IN_ADD(&(mv_ref_list)[(refmv_count)].as_mv, (bw), (bh), (xd));      \
+    if (refmv_count && (mv_ref_list)[1].as_int != (mv_ref_list)[0].as_int) { \
+      (refmv_count) = 2;                                                     \
+      goto Done;                                                             \
+    }                                                                        \
+    (refmv_count) = 1;                                                       \
+  } while (0)
+
+// If either reference frame is different, not INTRA, and they
+// are different from each other scale and add the mv to our list.
+#define IF_DIFF_REF_FRAME_ADD_MV(mbmi, ref_frame, ref_sign_bias, refmv_count, \
+                                 mv_ref_list, bw, bh, xd, Done)               \
+  do {                                                                        \
+    if (is_inter_block(mbmi)) {                                               \
+      if ((mbmi)->ref_frame[0] != ref_frame)                                  \
+        ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias),        \
+                        refmv_count, mv_ref_list, bw, bh, xd, Done);          \
+      if (has_second_ref(mbmi) && (mbmi)->ref_frame[1] != ref_frame)          \
+        ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias),        \
+                        refmv_count, mv_ref_list, bw, bh, xd, Done);          \
+    }                                                                         \
+  } while (0)
+
+// Checks that the given mi_row, mi_col and search point
+// are inside the borders of the tile.
+static INLINE int is_inside(const TileInfo *const tile, int mi_col, int mi_row,
+                            const POSITION *mi_pos) {
+  return !(mi_row + mi_pos->row < tile->mi_row_start ||
+           mi_col + mi_pos->col < tile->mi_col_start ||
+           mi_row + mi_pos->row >= tile->mi_row_end ||
+           mi_col + mi_pos->col >= tile->mi_col_end);
+}
+
+static INLINE void lower_mv_precision(MV *mv, int allow_hp) {
+  const int use_hp = allow_hp && vp10_use_mv_hp(mv);
+  if (!use_hp) {
+    if (mv->row & 1) mv->row += (mv->row > 0 ? -1 : 1);
+    if (mv->col & 1) mv->col += (mv->col > 0 ? -1 : 1);
+  }
+}
+
+#if CONFIG_REF_MV
+static INLINE int vp10_nmv_ctx(const uint8_t ref_mv_count,
+                               const CANDIDATE_MV *ref_mv_stack) {
+#if CONFIG_EXT_INTER
+  return 0;
+#endif
+  if (ref_mv_stack[0].weight > REF_CAT_LEVEL && ref_mv_count > 0) {
+    if (abs(ref_mv_stack[0].this_mv.as_mv.row -
+            ref_mv_stack[0].pred_mv.as_mv.row) <= 4 &&
+        abs(ref_mv_stack[0].this_mv.as_mv.col -
+            ref_mv_stack[0].pred_mv.as_mv.col) <= 4)
+      return 2;
+    else
+      return 1;
+  }
+  return 0;
+}
+
+static INLINE int8_t vp10_ref_frame_type(const MV_REFERENCE_FRAME *const rf) {
+  if (rf[1] > INTRA_FRAME) {
+    return TOTAL_REFS_PER_FRAME + FWD_RF_OFFSET(rf[0]) +
+           BWD_RF_OFFSET(rf[1]) * FWD_REFS;
+  }
+
+  return rf[0];
+}
+
+static MV_REFERENCE_FRAME ref_frame_map[COMP_REFS][2] = {
+#if CONFIG_EXT_REFS
+  { LAST_FRAME, BWDREF_FRAME },  { LAST2_FRAME, BWDREF_FRAME },
+  { LAST3_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, BWDREF_FRAME },
+
+  { LAST_FRAME, ALTREF_FRAME },  { LAST2_FRAME, ALTREF_FRAME },
+  { LAST3_FRAME, ALTREF_FRAME }, { GOLDEN_FRAME, ALTREF_FRAME }
+#else
+  { LAST_FRAME, ALTREF_FRAME }, { GOLDEN_FRAME, ALTREF_FRAME }
+#endif
+};
+
+static INLINE void vp10_set_ref_frame(MV_REFERENCE_FRAME *rf,
+                                      int8_t ref_frame_type) {
+  if (ref_frame_type >= TOTAL_REFS_PER_FRAME) {
+    rf[0] = ref_frame_map[ref_frame_type - TOTAL_REFS_PER_FRAME][0];
+    rf[1] = ref_frame_map[ref_frame_type - TOTAL_REFS_PER_FRAME][1];
+  } else {
+    rf[0] = ref_frame_type;
+    rf[1] = NONE;
+    assert(ref_frame_type > INTRA_FRAME &&
+           ref_frame_type < TOTAL_REFS_PER_FRAME);
+  }
+}
+
+static INLINE int16_t vp10_mode_context_analyzer(
+    const int16_t *const mode_context, const MV_REFERENCE_FRAME *const rf,
+    BLOCK_SIZE bsize, int block) {
+  int16_t mode_ctx = 0;
+  if (block >= 0) {
+    mode_ctx = mode_context[rf[0]] & 0x00ff;
+
+    if (block > 0 && bsize < BLOCK_8X8 && bsize > BLOCK_4X4)
+      mode_ctx |= (1 << SKIP_NEARESTMV_SUB8X8_OFFSET);
+
+    return mode_ctx;
+  }
+
+  if (rf[1] > INTRA_FRAME)
+    return mode_context[rf[0]] & (mode_context[rf[1]] | 0x00ff);
+  else if (rf[0] != ALTREF_FRAME)
+    return mode_context[rf[0]] & ~(mode_context[ALTREF_FRAME] & 0xfe00);
+  else
+    return mode_context[rf[0]];
+}
+
+static INLINE uint8_t vp10_drl_ctx(const CANDIDATE_MV *ref_mv_stack,
+                                   int ref_idx) {
+  if (ref_mv_stack[ref_idx].weight >= REF_CAT_LEVEL &&
+      ref_mv_stack[ref_idx + 1].weight >= REF_CAT_LEVEL) {
+    if (ref_mv_stack[ref_idx].weight == ref_mv_stack[ref_idx + 1].weight)
+      return 0;
+    else
+      return 1;
+  }
+
+  if (ref_mv_stack[ref_idx].weight >= REF_CAT_LEVEL &&
+      ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL)
+    return 2;
+
+  if (ref_mv_stack[ref_idx].weight < REF_CAT_LEVEL &&
+      ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL) {
+    if (ref_mv_stack[ref_idx].weight == ref_mv_stack[ref_idx + 1].weight)
+      return 3;
+    else
+      return 4;
+  }
+
+  return 0;
+}
+#endif
+
+typedef void (*find_mv_refs_sync)(void *const data, int mi_row);
+void vp10_find_mv_refs(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+                       MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
+#if CONFIG_REF_MV
+                       uint8_t *ref_mv_count, CANDIDATE_MV *ref_mv_stack,
+#if CONFIG_EXT_INTER
+                       int16_t *compound_mode_context,
+#endif  // CONFIG_EXT_INTER
+#endif
+                       int_mv *mv_ref_list, int mi_row, int mi_col,
+                       find_mv_refs_sync sync, void *const data,
+                       int16_t *mode_context);
+
+// check a list of motion vectors by sad score using a number rows of pixels
+// above and a number cols of pixels in the left to select the one with best
+// score to use as ref motion vector
+void vp10_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *nearest_mv,
+                            int_mv *near_mv);
+
+void vp10_append_sub8x8_mvs_for_idx(VP10_COMMON *cm, MACROBLOCKD *xd, int block,
+                                    int ref, int mi_row, int mi_col,
+#if CONFIG_REF_MV
+                                    CANDIDATE_MV *ref_mv_stack,
+                                    uint8_t *ref_mv_count,
+#endif
+#if CONFIG_EXT_INTER
+                                    int_mv *mv_list,
+#endif  // CONFIG_EXT_INTER
+                                    int_mv *nearest_mv, int_mv *near_mv);
+
+#if CONFIG_EXT_INTER
+// This function keeps a mode count for a given MB/SB
+void vp10_update_mv_context(const MACROBLOCKD *xd, MODE_INFO *mi,
+                            MV_REFERENCE_FRAME ref_frame, int_mv *mv_ref_list,
+                            int block, int mi_row, int mi_col,
+                            int16_t *mode_context);
+#endif  // CONFIG_EXT_INTER
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_MVREF_COMMON_H_
diff --git a/av1/common/od_dering.c b/av1/common/od_dering.c
new file mode 100644
index 0000000..b7a459c
--- /dev/null
+++ b/av1/common/od_dering.c
@@ -0,0 +1,362 @@
+/*Daala video codec
+Copyright (c) 2014-2016 Daala project contributors.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <math.h>
+#include "dering.h"
+
+const od_dering_opt_vtbl OD_DERING_VTBL_C = {
+  { od_filter_dering_direction_4x4_c, od_filter_dering_direction_8x8_c },
+  { od_filter_dering_orthogonal_4x4_c, od_filter_dering_orthogonal_8x8_c }
+};
+
+/* Generated from gen_filter_tables.c. */
+const int OD_DIRECTION_OFFSETS_TABLE[8][3] = {
+  { -1 * OD_FILT_BSTRIDE + 1, -2 * OD_FILT_BSTRIDE + 2,
+    -3 * OD_FILT_BSTRIDE + 3 },
+  { 0 * OD_FILT_BSTRIDE + 1, -1 * OD_FILT_BSTRIDE + 2,
+    -1 * OD_FILT_BSTRIDE + 3 },
+  { 0 * OD_FILT_BSTRIDE + 1, 0 * OD_FILT_BSTRIDE + 2, 0 * OD_FILT_BSTRIDE + 3 },
+  { 0 * OD_FILT_BSTRIDE + 1, 1 * OD_FILT_BSTRIDE + 2, 1 * OD_FILT_BSTRIDE + 3 },
+  { 1 * OD_FILT_BSTRIDE + 1, 2 * OD_FILT_BSTRIDE + 2, 3 * OD_FILT_BSTRIDE + 3 },
+  { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE + 1, 3 * OD_FILT_BSTRIDE + 1 },
+  { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE + 0, 3 * OD_FILT_BSTRIDE + 0 },
+  { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE - 1, 3 * OD_FILT_BSTRIDE - 1 },
+};
+
+const double OD_DERING_GAIN_TABLE[OD_DERING_LEVELS] = { 0, 0.5,  0.707,
+                                                        1, 1.41, 2 };
+
+/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
+   The search minimizes the weighted variance along all the lines in a
+   particular direction, i.e. the squared error between the input and a
+   "predicted" block where each pixel is replaced by the average along a line
+   in a particular direction. Since each direction have the same sum(x^2) term,
+   that term is never computed. See Section 2, step 2, of:
+   http://jmvalin.ca/notes/intra_paint.pdf */
+static int od_dir_find8(const od_dering_in *img, int stride, int32_t *var,
+                        int coeff_shift) {
+  int i;
+  int32_t cost[8] = { 0 };
+  int partial[8][15] = { { 0 } };
+  int32_t best_cost = 0;
+  int best_dir = 0;
+  /* Instead of dividing by n between 2 and 8, we multiply by 3*5*7*8/n.
+     The output is then 840 times larger, but we don't care for finding
+     the max. */
+  static const int div_table[] = { 0, 840, 420, 280, 210, 168, 140, 120, 105 };
+  for (i = 0; i < 8; i++) {
+    int j;
+    for (j = 0; j < 8; j++) {
+      int x;
+      /* We subtract 128 here to reduce the maximum range of the squared
+         partial sums. */
+      x = (img[i * stride + j] >> coeff_shift) - 128;
+      partial[0][i + j] += x;
+      partial[1][i + j / 2] += x;
+      partial[2][i] += x;
+      partial[3][3 + i - j / 2] += x;
+      partial[4][7 + i - j] += x;
+      partial[5][3 - i / 2 + j] += x;
+      partial[6][j] += x;
+      partial[7][i / 2 + j] += x;
+    }
+  }
+  for (i = 0; i < 8; i++) {
+    cost[2] += partial[2][i] * partial[2][i];
+    cost[6] += partial[6][i] * partial[6][i];
+  }
+  cost[2] *= div_table[8];
+  cost[6] *= div_table[8];
+  for (i = 0; i < 7; i++) {
+    cost[0] += (partial[0][i] * partial[0][i] +
+                partial[0][14 - i] * partial[0][14 - i]) *
+               div_table[i + 1];
+    cost[4] += (partial[4][i] * partial[4][i] +
+                partial[4][14 - i] * partial[4][14 - i]) *
+               div_table[i + 1];
+  }
+  cost[0] += partial[0][7] * partial[0][7] * div_table[8];
+  cost[4] += partial[4][7] * partial[4][7] * div_table[8];
+  for (i = 1; i < 8; i += 2) {
+    int j;
+    for (j = 0; j < 4 + 1; j++) {
+      cost[i] += partial[i][3 + j] * partial[i][3 + j];
+    }
+    cost[i] *= div_table[8];
+    for (j = 0; j < 4 - 1; j++) {
+      cost[i] += (partial[i][j] * partial[i][j] +
+                  partial[i][10 - j] * partial[i][10 - j]) *
+                 div_table[2 * j + 2];
+    }
+  }
+  for (i = 0; i < 8; i++) {
+    if (cost[i] > best_cost) {
+      best_cost = cost[i];
+      best_dir = i;
+    }
+  }
+  /* Difference between the optimal variance and the variance along the
+     orthogonal direction. Again, the sum(x^2) terms cancel out. */
+  *var = best_cost - cost[(best_dir + 4) & 7];
+  /* We'd normally divide by 840, but dividing by 1024 is close enough
+     for what we're going to do with this. */
+  *var >>= 10;
+  return best_dir;
+}
+
+#define OD_DERING_VERY_LARGE (30000)
+#define OD_DERING_INBUF_SIZE \
+  ((OD_BSIZE_MAX + 2 * OD_FILT_BORDER) * (OD_BSIZE_MAX + 2 * OD_FILT_BORDER))
+
+/* Smooth in the direction detected. */
+void od_filter_dering_direction_c(int16_t *y, int ystride, const int16_t *in,
+                                  int ln, int threshold, int dir) {
+  int i;
+  int j;
+  int k;
+  static const int taps[3] = { 3, 2, 2 };
+  for (i = 0; i < 1 << ln; i++) {
+    for (j = 0; j < 1 << ln; j++) {
+      int16_t sum;
+      int16_t xx;
+      int16_t yy;
+      xx = in[i * OD_FILT_BSTRIDE + j];
+      sum = 0;
+      for (k = 0; k < 3; k++) {
+        int16_t p0;
+        int16_t p1;
+        p0 = in[i * OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
+             xx;
+        p1 = in[i * OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
+             xx;
+        if (abs(p0) < threshold) sum += taps[k] * p0;
+        if (abs(p1) < threshold) sum += taps[k] * p1;
+      }
+      yy = xx + ((sum + 8) >> 4);
+      y[i * ystride + j] = yy;
+    }
+  }
+}
+
+void od_filter_dering_direction_4x4_c(int16_t *y, int ystride,
+                                      const int16_t *in, int threshold,
+                                      int dir) {
+  od_filter_dering_direction_c(y, ystride, in, 2, threshold, dir);
+}
+
+void od_filter_dering_direction_8x8_c(int16_t *y, int ystride,
+                                      const int16_t *in, int threshold,
+                                      int dir) {
+  od_filter_dering_direction_c(y, ystride, in, 3, threshold, dir);
+}
+
+/* Smooth in the direction orthogonal to what was detected. */
+void od_filter_dering_orthogonal_c(int16_t *y, int ystride, const int16_t *in,
+                                   const od_dering_in *x, int xstride, int ln,
+                                   int threshold, int dir) {
+  int i;
+  int j;
+  int offset;
+  if (dir > 0 && dir < 4)
+    offset = OD_FILT_BSTRIDE;
+  else
+    offset = 1;
+  for (i = 0; i < 1 << ln; i++) {
+    for (j = 0; j < 1 << ln; j++) {
+      int16_t athresh;
+      int16_t yy;
+      int16_t sum;
+      int16_t p;
+      /* Deringing orthogonal to the direction uses a tighter threshold
+         because we want to be conservative. We've presumably already
+         achieved some deringing, so the amount of change is expected
+         to be low. Also, since we might be filtering across an edge, we
+         want to make sure not to blur it. That being said, we might want
+         to be a little bit more aggressive on pure horizontal/vertical
+         since the ringing there tends to be directional, so it doesn't
+         get removed by the directional filtering. */
+      athresh = OD_MINI(
+          threshold, threshold / 3 +
+                         abs(in[i * OD_FILT_BSTRIDE + j] - x[i * xstride + j]));
+      yy = in[i * OD_FILT_BSTRIDE + j];
+      sum = 0;
+      p = in[i * OD_FILT_BSTRIDE + j + offset] - yy;
+      if (abs(p) < athresh) sum += p;
+      p = in[i * OD_FILT_BSTRIDE + j - offset] - yy;
+      if (abs(p) < athresh) sum += p;
+      p = in[i * OD_FILT_BSTRIDE + j + 2 * offset] - yy;
+      if (abs(p) < athresh) sum += p;
+      p = in[i * OD_FILT_BSTRIDE + j - 2 * offset] - yy;
+      if (abs(p) < athresh) sum += p;
+      y[i * ystride + j] = yy + ((3 * sum + 8) >> 4);
+    }
+  }
+}
+
+void od_filter_dering_orthogonal_4x4_c(int16_t *y, int ystride,
+                                       const int16_t *in, const od_dering_in *x,
+                                       int xstride, int threshold, int dir) {
+  od_filter_dering_orthogonal_c(y, ystride, in, x, xstride, 2, threshold, dir);
+}
+
+void od_filter_dering_orthogonal_8x8_c(int16_t *y, int ystride,
+                                       const int16_t *in, const od_dering_in *x,
+                                       int xstride, int threshold, int dir) {
+  od_filter_dering_orthogonal_c(y, ystride, in, x, xstride, 3, threshold, dir);
+}
+
+/* This table approximates x^0.16 with the index being log2(x). It is clamped
+   to [-.5, 3]. The table is computed as:
+   round(256*min(3, max(.5, 1.08*(sqrt(2)*2.^([0:17]+8)/256/256).^.16))) */
+static const int16_t OD_THRESH_TABLE_Q8[18] = {
+  128, 134, 150, 168, 188, 210, 234, 262, 292,
+  327, 365, 408, 455, 509, 569, 635, 710, 768,
+};
+
+/* Compute deringing filter threshold for each 8x8 block based on the
+   directional variance difference. A high variance difference means that we
+   have a highly directional pattern (e.g. a high contrast edge), so we can
+   apply more deringing. A low variance means that we either have a low
+   contrast edge, or a non-directional texture, so we want to be careful not
+   to blur. */
+static void od_compute_thresh(int thresh[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
+                              int threshold,
+                              int32_t var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
+                              int nhb, int nvb) {
+  int bx;
+  int by;
+  for (by = 0; by < nvb; by++) {
+    for (bx = 0; bx < nhb; bx++) {
+      int v1;
+      /* We use the variance of 8x8 blocks to adjust the threshold. */
+      v1 = OD_MINI(32767, var[by][bx] >> 6);
+      thresh[by][bx] = (threshold * OD_THRESH_TABLE_Q8[OD_ILOG(v1)] + 128) >> 8;
+    }
+  }
+}
+
+void od_dering(const od_dering_opt_vtbl *vtbl, int16_t *y, int ystride,
+               const od_dering_in *x, int xstride, int nhb, int nvb, int sbx,
+               int sby, int nhsb, int nvsb, int xdec,
+               int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
+               unsigned char *bskip, int skip_stride, int threshold,
+               int overlap, int coeff_shift) {
+  int i;
+  int j;
+  int bx;
+  int by;
+  int16_t inbuf[OD_DERING_INBUF_SIZE];
+  int16_t *in;
+  int bsize;
+  int32_t var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS];
+  int thresh[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS];
+  bsize = 3 - xdec;
+  in = inbuf + OD_FILT_BORDER * OD_FILT_BSTRIDE + OD_FILT_BORDER;
+  /* We avoid filtering the pixels for which some of the pixels to average
+     are outside the frame. We could change the filter instead, but it would
+     add special cases for any future vectorization. */
+  for (i = 0; i < OD_DERING_INBUF_SIZE; i++) inbuf[i] = OD_DERING_VERY_LARGE;
+  for (i = -OD_FILT_BORDER * (sby != 0);
+       i < (nvb << bsize) + OD_FILT_BORDER * (sby != nvsb - 1); i++) {
+    for (j = -OD_FILT_BORDER * (sbx != 0);
+         j < (nhb << bsize) + OD_FILT_BORDER * (sbx != nhsb - 1); j++) {
+      in[i * OD_FILT_BSTRIDE + j] = x[i * xstride + j];
+    }
+  }
+  if (pli == 0) {
+    for (by = 0; by < nvb; by++) {
+      for (bx = 0; bx < nhb; bx++) {
+        dir[by][bx] = od_dir_find8(&x[8 * by * xstride + 8 * bx], xstride,
+                                   &var[by][bx], coeff_shift);
+      }
+    }
+    od_compute_thresh(thresh, threshold, var, nhb, nvb);
+  } else {
+    for (by = 0; by < nvb; by++) {
+      for (bx = 0; bx < nhb; bx++) {
+        thresh[by][bx] = threshold;
+      }
+    }
+  }
+  for (by = 0; by < nvb; by++) {
+    for (bx = 0; bx < nhb; bx++) {
+      int skip;
+#if defined(DAALA_ODINTRIN)
+      int xstart;
+      int ystart;
+      int xend;
+      int yend;
+      xstart = ystart = 0;
+      xend = yend = (2 >> xdec);
+      if (overlap) {
+        xstart -= (sbx != 0);
+        ystart -= (sby != 0);
+        xend += (sbx != nhsb - 1);
+        yend += (sby != nvsb - 1);
+      }
+      skip = 1;
+      /* We look at whether the current block and its 4x4 surrounding (due to
+         lapping) are skipped to avoid filtering the same content multiple
+         times. */
+      for (i = ystart; i < yend; i++) {
+        for (j = xstart; j < xend; j++) {
+          skip = skip && bskip[((by << 1 >> xdec) + i) * skip_stride +
+                               (bx << 1 >> xdec) + j];
+        }
+      }
+#else
+      (void)overlap;
+      skip = bskip[by * skip_stride + bx];
+#endif
+      if (skip) thresh[by][bx] = 0;
+    }
+  }
+  for (by = 0; by < nvb; by++) {
+    for (bx = 0; bx < nhb; bx++) {
+      (vtbl->filter_dering_direction[bsize - OD_LOG_BSIZE0])(
+          &y[(by * ystride << bsize) + (bx << bsize)], ystride,
+          &in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)], thresh[by][bx],
+          dir[by][bx]);
+    }
+  }
+  for (i = 0; i < nvb << bsize; i++) {
+    for (j = 0; j < nhb << bsize; j++) {
+      in[i * OD_FILT_BSTRIDE + j] = y[i * ystride + j];
+    }
+  }
+  for (by = 0; by < nvb; by++) {
+    for (bx = 0; bx < nhb; bx++) {
+      (vtbl->filter_dering_orthogonal[bsize - OD_LOG_BSIZE0])(
+          &y[(by * ystride << bsize) + (bx << bsize)], ystride,
+          &in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)],
+          &x[(by * xstride << bsize) + (bx << bsize)], xstride, thresh[by][bx],
+          dir[by][bx]);
+    }
+  }
+}
diff --git a/av1/common/od_dering.h b/av1/common/od_dering.h
new file mode 100644
index 0000000..6bb3974
--- /dev/null
+++ b/av1/common/od_dering.h
@@ -0,0 +1,92 @@
+/*Daala video codec
+Copyright (c) 2003-2010 Daala project contributors.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#if !defined(_dering_H)
+#define _dering_H (1)
+
+#include "odintrin.h"
+
+#if defined(DAALA_ODINTRIN)
+#include "filter.h"
+typedef int16_t od_dering_in;
+#endif
+
+#define OD_DERINGSIZES (2)
+
+#define OD_DERING_NO_CHECK_OVERLAP (0)
+#define OD_DERING_CHECK_OVERLAP (1)
+
+#define OD_DERING_LEVELS (6)
+extern const double OD_DERING_GAIN_TABLE[OD_DERING_LEVELS];
+
+#define OD_DERING_NBLOCKS (OD_BSIZE_MAX / 8)
+
+#define OD_FILT_BORDER (3)
+#define OD_FILT_BSTRIDE (OD_BSIZE_MAX + 2 * OD_FILT_BORDER)
+
+extern const int OD_DIRECTION_OFFSETS_TABLE[8][3];
+
+typedef void (*od_filter_dering_direction_func)(int16_t *y, int ystride,
+                                                const int16_t *in,
+                                                int threshold, int dir);
+typedef void (*od_filter_dering_orthogonal_func)(int16_t *y, int ystride,
+                                                 const int16_t *in,
+                                                 const od_dering_in *x,
+                                                 int xstride, int threshold,
+                                                 int dir);
+
+struct od_dering_opt_vtbl {
+  od_filter_dering_direction_func filter_dering_direction[OD_DERINGSIZES];
+  od_filter_dering_orthogonal_func filter_dering_orthogonal[OD_DERINGSIZES];
+};
+typedef struct od_dering_opt_vtbl od_dering_opt_vtbl;
+
+void od_dering(const od_dering_opt_vtbl *vtbl, int16_t *y, int ystride,
+               const od_dering_in *x, int xstride, int nvb, int nhb, int sbx,
+               int sby, int nhsb, int nvsb, int xdec,
+               int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
+               unsigned char *bskip, int skip_stride, int threshold,
+               int overlap, int coeff_shift);
+void od_filter_dering_direction_c(int16_t *y, int ystride, const int16_t *in,
+                                  int ln, int threshold, int dir);
+void od_filter_dering_orthogonal_c(int16_t *y, int ystride, const int16_t *in,
+                                   const od_dering_in *x, int xstride, int ln,
+                                   int threshold, int dir);
+
+extern const od_dering_opt_vtbl OD_DERING_VTBL_C;
+
+void od_filter_dering_direction_4x4_c(int16_t *y, int ystride,
+                                      const int16_t *in, int threshold,
+                                      int dir);
+void od_filter_dering_direction_8x8_c(int16_t *y, int ystride,
+                                      const int16_t *in, int threshold,
+                                      int dir);
+void od_filter_dering_orthogonal_4x4_c(int16_t *y, int ystride,
+                                       const int16_t *in, const od_dering_in *x,
+                                       int xstride, int threshold, int dir);
+void od_filter_dering_orthogonal_8x8_c(int16_t *y, int ystride,
+                                       const int16_t *in, const od_dering_in *x,
+                                       int xstride, int threshold, int dir);
+
+#endif
diff --git a/av1/common/odintrin.c b/av1/common/odintrin.c
new file mode 100644
index 0000000..b5bbaa6
--- /dev/null
+++ b/av1/common/odintrin.c
@@ -0,0 +1,552 @@
+/*Daala video codec
+Copyright (c) 2006-2010 Daala project contributors.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#include "av1/common/odintrin.h"
+
+/*Constants for use with OD_DIVU_SMALL().
+  See \cite{Rob05} for details on computing these constants.
+  @INPROCEEDINGS{Rob05,
+    author="Arch D. Robison",
+    title="{N}-bit Unsigned Division via {N}-bit Multiply-Add",
+    booktitle="Proc. of the 17th IEEE Symposium on Computer Arithmetic
+     (ARITH'05)",
+    pages="131--139",
+    address="Cape Cod, MA",
+    month=Jun,
+    year=2005
+  }*/
+uint32_t OD_DIVU_SMALL_CONSTS[OD_DIVU_DMAX][2] = {
+  { 0xFFFFFFFF, 0xFFFFFFFF }, { 0xFFFFFFFF, 0xFFFFFFFF },
+  { 0xAAAAAAAB, 0 },          { 0xFFFFFFFF, 0xFFFFFFFF },
+  { 0xCCCCCCCD, 0 },          { 0xAAAAAAAB, 0 },
+  { 0x92492492, 0x92492492 }, { 0xFFFFFFFF, 0xFFFFFFFF },
+  { 0xE38E38E4, 0 },          { 0xCCCCCCCD, 0 },
+  { 0xBA2E8BA3, 0 },          { 0xAAAAAAAB, 0 },
+  { 0x9D89D89E, 0 },          { 0x92492492, 0x92492492 },
+  { 0x88888889, 0 },          { 0xFFFFFFFF, 0xFFFFFFFF },
+  { 0xF0F0F0F1, 0 },          { 0xE38E38E4, 0 },
+  { 0xD79435E5, 0xD79435E5 }, { 0xCCCCCCCD, 0 },
+  { 0xC30C30C3, 0xC30C30C3 }, { 0xBA2E8BA3, 0 },
+  { 0xB21642C9, 0 },          { 0xAAAAAAAB, 0 },
+  { 0xA3D70A3E, 0 },          { 0x9D89D89E, 0 },
+  { 0x97B425ED, 0x97B425ED }, { 0x92492492, 0x92492492 },
+  { 0x8D3DCB09, 0 },          { 0x88888889, 0 },
+  { 0x84210842, 0x84210842 }, { 0xFFFFFFFF, 0xFFFFFFFF },
+  { 0xF83E0F84, 0 },          { 0xF0F0F0F1, 0 },
+  { 0xEA0EA0EA, 0xEA0EA0EA }, { 0xE38E38E4, 0 },
+  { 0xDD67C8A6, 0xDD67C8A6 }, { 0xD79435E5, 0xD79435E5 },
+  { 0xD20D20D2, 0xD20D20D2 }, { 0xCCCCCCCD, 0 },
+  { 0xC7CE0C7D, 0 },          { 0xC30C30C3, 0xC30C30C3 },
+  { 0xBE82FA0C, 0 },          { 0xBA2E8BA3, 0 },
+  { 0xB60B60B6, 0xB60B60B6 }, { 0xB21642C9, 0 },
+  { 0xAE4C415D, 0 },          { 0xAAAAAAAB, 0 },
+  { 0xA72F053A, 0 },          { 0xA3D70A3E, 0 },
+  { 0xA0A0A0A1, 0 },          { 0x9D89D89E, 0 },
+  { 0x9A90E7D9, 0x9A90E7D9 }, { 0x97B425ED, 0x97B425ED },
+  { 0x94F2094F, 0x94F2094F }, { 0x92492492, 0x92492492 },
+  { 0x8FB823EE, 0x8FB823EE }, { 0x8D3DCB09, 0 },
+  { 0x8AD8F2FC, 0 },          { 0x88888889, 0 },
+  { 0x864B8A7E, 0 },          { 0x84210842, 0x84210842 },
+  { 0x82082082, 0x82082082 }, { 0xFFFFFFFF, 0xFFFFFFFF },
+  { 0xFC0FC0FD, 0 },          { 0xF83E0F84, 0 },
+  { 0xF4898D60, 0 },          { 0xF0F0F0F1, 0 },
+  { 0xED7303B6, 0 },          { 0xEA0EA0EA, 0xEA0EA0EA },
+  { 0xE6C2B449, 0 },          { 0xE38E38E4, 0 },
+  { 0xE070381C, 0xE070381C }, { 0xDD67C8A6, 0xDD67C8A6 },
+  { 0xDA740DA8, 0 },          { 0xD79435E5, 0xD79435E5 },
+  { 0xD4C77B04, 0 },          { 0xD20D20D2, 0xD20D20D2 },
+  { 0xCF6474A9, 0 },          { 0xCCCCCCCD, 0 },
+  { 0xCA4587E7, 0 },          { 0xC7CE0C7D, 0 },
+  { 0xC565C87C, 0 },          { 0xC30C30C3, 0xC30C30C3 },
+  { 0xC0C0C0C1, 0 },          { 0xBE82FA0C, 0 },
+  { 0xBC52640C, 0 },          { 0xBA2E8BA3, 0 },
+  { 0xB81702E1, 0 },          { 0xB60B60B6, 0xB60B60B6 },
+  { 0xB40B40B4, 0xB40B40B4 }, { 0xB21642C9, 0 },
+  { 0xB02C0B03, 0 },          { 0xAE4C415D, 0 },
+  { 0xAC769184, 0xAC769184 }, { 0xAAAAAAAB, 0 },
+  { 0xA8E83F57, 0xA8E83F57 }, { 0xA72F053A, 0 },
+  { 0xA57EB503, 0 },          { 0xA3D70A3E, 0 },
+  { 0xA237C32B, 0xA237C32B }, { 0xA0A0A0A1, 0 },
+  { 0x9F1165E7, 0x9F1165E7 }, { 0x9D89D89E, 0 },
+  { 0x9C09C09C, 0x9C09C09C }, { 0x9A90E7D9, 0x9A90E7D9 },
+  { 0x991F1A51, 0x991F1A51 }, { 0x97B425ED, 0x97B425ED },
+  { 0x964FDA6C, 0x964FDA6C }, { 0x94F2094F, 0x94F2094F },
+  { 0x939A85C4, 0x939A85C4 }, { 0x92492492, 0x92492492 },
+  { 0x90FDBC09, 0x90FDBC09 }, { 0x8FB823EE, 0x8FB823EE },
+  { 0x8E78356D, 0x8E78356D }, { 0x8D3DCB09, 0 },
+  { 0x8C08C08C, 0x8C08C08C }, { 0x8AD8F2FC, 0 },
+  { 0x89AE408A, 0 },          { 0x88888889, 0 },
+  { 0x8767AB5F, 0x8767AB5F }, { 0x864B8A7E, 0 },
+  { 0x85340853, 0x85340853 }, { 0x84210842, 0x84210842 },
+  { 0x83126E98, 0 },          { 0x82082082, 0x82082082 },
+  { 0x81020408, 0x81020408 }, { 0xFFFFFFFF, 0xFFFFFFFF },
+  { 0xFE03F810, 0 },          { 0xFC0FC0FD, 0 },
+  { 0xFA232CF3, 0 },          { 0xF83E0F84, 0 },
+  { 0xF6603D99, 0 },          { 0xF4898D60, 0 },
+  { 0xF2B9D649, 0 },          { 0xF0F0F0F1, 0 },
+  { 0xEF2EB720, 0 },          { 0xED7303B6, 0 },
+  { 0xEBBDB2A6, 0 },          { 0xEA0EA0EA, 0xEA0EA0EA },
+  { 0xE865AC7C, 0 },          { 0xE6C2B449, 0 },
+  { 0xE525982B, 0 },          { 0xE38E38E4, 0 },
+  { 0xE1FC780F, 0 },          { 0xE070381C, 0xE070381C },
+  { 0xDEE95C4D, 0 },          { 0xDD67C8A6, 0xDD67C8A6 },
+  { 0xDBEB61EF, 0 },          { 0xDA740DA8, 0 },
+  { 0xD901B204, 0 },          { 0xD79435E5, 0xD79435E5 },
+  { 0xD62B80D7, 0 },          { 0xD4C77B04, 0 },
+  { 0xD3680D37, 0 },          { 0xD20D20D2, 0xD20D20D2 },
+  { 0xD0B69FCC, 0 },          { 0xCF6474A9, 0 },
+  { 0xCE168A77, 0xCE168A77 }, { 0xCCCCCCCD, 0 },
+  { 0xCB8727C1, 0 },          { 0xCA4587E7, 0 },
+  { 0xC907DA4F, 0 },          { 0xC7CE0C7D, 0 },
+  { 0xC6980C6A, 0 },          { 0xC565C87C, 0 },
+  { 0xC4372F86, 0 },          { 0xC30C30C3, 0xC30C30C3 },
+  { 0xC1E4BBD6, 0 },          { 0xC0C0C0C1, 0 },
+  { 0xBFA02FE8, 0xBFA02FE8 }, { 0xBE82FA0C, 0 },
+  { 0xBD691047, 0xBD691047 }, { 0xBC52640C, 0 },
+  { 0xBB3EE722, 0 },          { 0xBA2E8BA3, 0 },
+  { 0xB92143FA, 0xB92143FA }, { 0xB81702E1, 0 },
+  { 0xB70FBB5A, 0xB70FBB5A }, { 0xB60B60B6, 0xB60B60B6 },
+  { 0xB509E68B, 0 },          { 0xB40B40B4, 0xB40B40B4 },
+  { 0xB30F6353, 0 },          { 0xB21642C9, 0 },
+  { 0xB11FD3B8, 0xB11FD3B8 }, { 0xB02C0B03, 0 },
+  { 0xAF3ADDC7, 0 },          { 0xAE4C415D, 0 },
+  { 0xAD602B58, 0xAD602B58 }, { 0xAC769184, 0xAC769184 },
+  { 0xAB8F69E3, 0 },          { 0xAAAAAAAB, 0 },
+  { 0xA9C84A48, 0 },          { 0xA8E83F57, 0xA8E83F57 },
+  { 0xA80A80A8, 0xA80A80A8 }, { 0xA72F053A, 0 },
+  { 0xA655C439, 0xA655C439 }, { 0xA57EB503, 0 },
+  { 0xA4A9CF1E, 0 },          { 0xA3D70A3E, 0 },
+  { 0xA3065E40, 0 },          { 0xA237C32B, 0xA237C32B },
+  { 0xA16B312F, 0 },          { 0xA0A0A0A1, 0 },
+  { 0x9FD809FE, 0 },          { 0x9F1165E7, 0x9F1165E7 },
+  { 0x9E4CAD24, 0 },          { 0x9D89D89E, 0 },
+  { 0x9CC8E161, 0 },          { 0x9C09C09C, 0x9C09C09C },
+  { 0x9B4C6F9F, 0 },          { 0x9A90E7D9, 0x9A90E7D9 },
+  { 0x99D722DB, 0 },          { 0x991F1A51, 0x991F1A51 },
+  { 0x9868C80A, 0 },          { 0x97B425ED, 0x97B425ED },
+  { 0x97012E02, 0x97012E02 }, { 0x964FDA6C, 0x964FDA6C },
+  { 0x95A02568, 0x95A02568 }, { 0x94F2094F, 0x94F2094F },
+  { 0x94458094, 0x94458094 }, { 0x939A85C4, 0x939A85C4 },
+  { 0x92F11384, 0x92F11384 }, { 0x92492492, 0x92492492 },
+  { 0x91A2B3C5, 0 },          { 0x90FDBC09, 0x90FDBC09 },
+  { 0x905A3863, 0x905A3863 }, { 0x8FB823EE, 0x8FB823EE },
+  { 0x8F1779DA, 0 },          { 0x8E78356D, 0x8E78356D },
+  { 0x8DDA5202, 0x8DDA5202 }, { 0x8D3DCB09, 0 },
+  { 0x8CA29C04, 0x8CA29C04 }, { 0x8C08C08C, 0x8C08C08C },
+  { 0x8B70344A, 0x8B70344A }, { 0x8AD8F2FC, 0 },
+  { 0x8A42F870, 0x8A42F870 }, { 0x89AE408A, 0 },
+  { 0x891AC73B, 0 },          { 0x88888889, 0 },
+  { 0x87F78088, 0 },          { 0x8767AB5F, 0x8767AB5F },
+  { 0x86D90545, 0 },          { 0x864B8A7E, 0 },
+  { 0x85BF3761, 0x85BF3761 }, { 0x85340853, 0x85340853 },
+  { 0x84A9F9C8, 0x84A9F9C8 }, { 0x84210842, 0x84210842 },
+  { 0x83993052, 0x83993052 }, { 0x83126E98, 0 },
+  { 0x828CBFBF, 0 },          { 0x82082082, 0x82082082 },
+  { 0x81848DA9, 0 },          { 0x81020408, 0x81020408 },
+  { 0x80808081, 0 },          { 0xFFFFFFFF, 0xFFFFFFFF },
+  { 0xFF00FF01, 0 },          { 0xFE03F810, 0 },
+  { 0xFD08E551, 0 },          { 0xFC0FC0FD, 0 },
+  { 0xFB188566, 0 },          { 0xFA232CF3, 0 },
+  { 0xF92FB222, 0 },          { 0xF83E0F84, 0 },
+  { 0xF74E3FC3, 0 },          { 0xF6603D99, 0 },
+  { 0xF57403D6, 0 },          { 0xF4898D60, 0 },
+  { 0xF3A0D52D, 0 },          { 0xF2B9D649, 0 },
+  { 0xF1D48BCF, 0 },          { 0xF0F0F0F1, 0 },
+  { 0xF00F00F0, 0xF00F00F0 }, { 0xEF2EB720, 0 },
+  { 0xEE500EE5, 0xEE500EE5 }, { 0xED7303B6, 0 },
+  { 0xEC979119, 0 },          { 0xEBBDB2A6, 0 },
+  { 0xEAE56404, 0 },          { 0xEA0EA0EA, 0xEA0EA0EA },
+  { 0xE9396520, 0 },          { 0xE865AC7C, 0 },
+  { 0xE79372E3, 0 },          { 0xE6C2B449, 0 },
+  { 0xE5F36CB0, 0xE5F36CB0 }, { 0xE525982B, 0 },
+  { 0xE45932D8, 0 },          { 0xE38E38E4, 0 },
+  { 0xE2C4A689, 0 },          { 0xE1FC780F, 0 },
+  { 0xE135A9CA, 0 },          { 0xE070381C, 0xE070381C },
+  { 0xDFAC1F75, 0 },          { 0xDEE95C4D, 0 },
+  { 0xDE27EB2D, 0 },          { 0xDD67C8A6, 0xDD67C8A6 },
+  { 0xDCA8F159, 0 },          { 0xDBEB61EF, 0 },
+  { 0xDB2F171E, 0 },          { 0xDA740DA8, 0 },
+  { 0xD9BA4257, 0 },          { 0xD901B204, 0 },
+  { 0xD84A598F, 0 },          { 0xD79435E5, 0xD79435E5 },
+  { 0xD6DF43FD, 0 },          { 0xD62B80D7, 0 },
+  { 0xD578E97D, 0 },          { 0xD4C77B04, 0 },
+  { 0xD417328A, 0 },          { 0xD3680D37, 0 },
+  { 0xD2BA083C, 0 },          { 0xD20D20D2, 0xD20D20D2 },
+  { 0xD161543E, 0xD161543E }, { 0xD0B69FCC, 0 },
+  { 0xD00D00D0, 0xD00D00D0 }, { 0xCF6474A9, 0 },
+  { 0xCEBCF8BC, 0 },          { 0xCE168A77, 0xCE168A77 },
+  { 0xCD712753, 0 },          { 0xCCCCCCCD, 0 },
+  { 0xCC29786D, 0 },          { 0xCB8727C1, 0 },
+  { 0xCAE5D85F, 0xCAE5D85F }, { 0xCA4587E7, 0 },
+  { 0xC9A633FD, 0 },          { 0xC907DA4F, 0 },
+  { 0xC86A7890, 0xC86A7890 }, { 0xC7CE0C7D, 0 },
+  { 0xC73293D8, 0 },          { 0xC6980C6A, 0 },
+  { 0xC5FE7403, 0xC5FE7403 }, { 0xC565C87C, 0 },
+  { 0xC4CE07B0, 0xC4CE07B0 }, { 0xC4372F86, 0 },
+  { 0xC3A13DE6, 0xC3A13DE6 }, { 0xC30C30C3, 0xC30C30C3 },
+  { 0xC2780614, 0 },          { 0xC1E4BBD6, 0 },
+  { 0xC152500C, 0xC152500C }, { 0xC0C0C0C1, 0 },
+  { 0xC0300C03, 0xC0300C03 }, { 0xBFA02FE8, 0xBFA02FE8 },
+  { 0xBF112A8B, 0 },          { 0xBE82FA0C, 0 },
+  { 0xBDF59C92, 0 },          { 0xBD691047, 0xBD691047 },
+  { 0xBCDD535E, 0 },          { 0xBC52640C, 0 },
+  { 0xBBC8408D, 0 },          { 0xBB3EE722, 0 },
+  { 0xBAB65610, 0xBAB65610 }, { 0xBA2E8BA3, 0 },
+  { 0xB9A7862A, 0xB9A7862A }, { 0xB92143FA, 0xB92143FA },
+  { 0xB89BC36D, 0 },          { 0xB81702E1, 0 },
+  { 0xB79300B8, 0 },          { 0xB70FBB5A, 0xB70FBB5A },
+  { 0xB68D3134, 0xB68D3134 }, { 0xB60B60B6, 0xB60B60B6 },
+  { 0xB58A4855, 0xB58A4855 }, { 0xB509E68B, 0 },
+  { 0xB48A39D4, 0xB48A39D4 }, { 0xB40B40B4, 0xB40B40B4 },
+  { 0xB38CF9B0, 0xB38CF9B0 }, { 0xB30F6353, 0 },
+  { 0xB2927C2A, 0 },          { 0xB21642C9, 0 },
+  { 0xB19AB5C5, 0 },          { 0xB11FD3B8, 0xB11FD3B8 },
+  { 0xB0A59B42, 0 },          { 0xB02C0B03, 0 },
+  { 0xAFB321A1, 0xAFB321A1 }, { 0xAF3ADDC7, 0 },
+  { 0xAEC33E20, 0 },          { 0xAE4C415D, 0 },
+  { 0xADD5E632, 0xADD5E632 }, { 0xAD602B58, 0xAD602B58 },
+  { 0xACEB0F89, 0xACEB0F89 }, { 0xAC769184, 0xAC769184 },
+  { 0xAC02B00B, 0 },          { 0xAB8F69E3, 0 },
+  { 0xAB1CBDD4, 0 },          { 0xAAAAAAAB, 0 },
+  { 0xAA392F36, 0 },          { 0xA9C84A48, 0 },
+  { 0xA957FAB5, 0xA957FAB5 }, { 0xA8E83F57, 0xA8E83F57 },
+  { 0xA8791709, 0 },          { 0xA80A80A8, 0xA80A80A8 },
+  { 0xA79C7B17, 0 },          { 0xA72F053A, 0 },
+  { 0xA6C21DF7, 0 },          { 0xA655C439, 0xA655C439 },
+  { 0xA5E9F6ED, 0xA5E9F6ED }, { 0xA57EB503, 0 },
+  { 0xA513FD6C, 0 },          { 0xA4A9CF1E, 0 },
+  { 0xA4402910, 0xA4402910 }, { 0xA3D70A3E, 0 },
+  { 0xA36E71A3, 0 },          { 0xA3065E40, 0 },
+  { 0xA29ECF16, 0xA29ECF16 }, { 0xA237C32B, 0xA237C32B },
+  { 0xA1D13986, 0 },          { 0xA16B312F, 0 },
+  { 0xA105A933, 0 },          { 0xA0A0A0A1, 0 },
+  { 0xA03C1689, 0 },          { 0x9FD809FE, 0 },
+  { 0x9F747A15, 0x9F747A15 }, { 0x9F1165E7, 0x9F1165E7 },
+  { 0x9EAECC8D, 0x9EAECC8D }, { 0x9E4CAD24, 0 },
+  { 0x9DEB06C9, 0x9DEB06C9 }, { 0x9D89D89E, 0 },
+  { 0x9D2921C4, 0 },          { 0x9CC8E161, 0 },
+  { 0x9C69169B, 0x9C69169B }, { 0x9C09C09C, 0x9C09C09C },
+  { 0x9BAADE8E, 0x9BAADE8E }, { 0x9B4C6F9F, 0 },
+  { 0x9AEE72FD, 0 },          { 0x9A90E7D9, 0x9A90E7D9 },
+  { 0x9A33CD67, 0x9A33CD67 }, { 0x99D722DB, 0 },
+  { 0x997AE76B, 0x997AE76B }, { 0x991F1A51, 0x991F1A51 },
+  { 0x98C3BAC7, 0x98C3BAC7 }, { 0x9868C80A, 0 },
+  { 0x980E4156, 0x980E4156 }, { 0x97B425ED, 0x97B425ED },
+  { 0x975A7510, 0 },          { 0x97012E02, 0x97012E02 },
+  { 0x96A8500A, 0 },          { 0x964FDA6C, 0x964FDA6C },
+  { 0x95F7CC73, 0 },          { 0x95A02568, 0x95A02568 },
+  { 0x9548E498, 0 },          { 0x94F2094F, 0x94F2094F },
+  { 0x949B92DE, 0 },          { 0x94458094, 0x94458094 },
+  { 0x93EFD1C5, 0x93EFD1C5 }, { 0x939A85C4, 0x939A85C4 },
+  { 0x93459BE7, 0 },          { 0x92F11384, 0x92F11384 },
+  { 0x929CEBF5, 0 },          { 0x92492492, 0x92492492 },
+  { 0x91F5BCB9, 0 },          { 0x91A2B3C5, 0 },
+  { 0x91500915, 0x91500915 }, { 0x90FDBC09, 0x90FDBC09 },
+  { 0x90ABCC02, 0x90ABCC02 }, { 0x905A3863, 0x905A3863 },
+  { 0x90090090, 0x90090090 }, { 0x8FB823EE, 0x8FB823EE },
+  { 0x8F67A1E4, 0 },          { 0x8F1779DA, 0 },
+  { 0x8EC7AB3A, 0 },          { 0x8E78356D, 0x8E78356D },
+  { 0x8E2917E1, 0 },          { 0x8DDA5202, 0x8DDA5202 },
+  { 0x8D8BE340, 0 },          { 0x8D3DCB09, 0 },
+  { 0x8CF008CF, 0x8CF008CF }, { 0x8CA29C04, 0x8CA29C04 },
+  { 0x8C55841D, 0 },          { 0x8C08C08C, 0x8C08C08C },
+  { 0x8BBC50C9, 0 },          { 0x8B70344A, 0x8B70344A },
+  { 0x8B246A88, 0 },          { 0x8AD8F2FC, 0 },
+  { 0x8A8DCD20, 0 },          { 0x8A42F870, 0x8A42F870 },
+  { 0x89F8746A, 0 },          { 0x89AE408A, 0 },
+  { 0x89645C4F, 0x89645C4F }, { 0x891AC73B, 0 },
+  { 0x88D180CD, 0x88D180CD }, { 0x88888889, 0 },
+  { 0x883FDDF0, 0x883FDDF0 }, { 0x87F78088, 0 },
+  { 0x87AF6FD6, 0 },          { 0x8767AB5F, 0x8767AB5F },
+  { 0x872032AC, 0x872032AC }, { 0x86D90545, 0 },
+  { 0x869222B2, 0 },          { 0x864B8A7E, 0 },
+  { 0x86053C34, 0x86053C34 }, { 0x85BF3761, 0x85BF3761 },
+  { 0x85797B91, 0x85797B91 }, { 0x85340853, 0x85340853 },
+  { 0x84EEDD36, 0 },          { 0x84A9F9C8, 0x84A9F9C8 },
+  { 0x84655D9C, 0 },          { 0x84210842, 0x84210842 },
+  { 0x83DCF94E, 0 },          { 0x83993052, 0x83993052 },
+  { 0x8355ACE4, 0 },          { 0x83126E98, 0 },
+  { 0x82CF7504, 0 },          { 0x828CBFBF, 0 },
+  { 0x824A4E61, 0 },          { 0x82082082, 0x82082082 },
+  { 0x81C635BC, 0x81C635BC }, { 0x81848DA9, 0 },
+  { 0x814327E4, 0 },          { 0x81020408, 0x81020408 },
+  { 0x80C121B3, 0 },          { 0x80808081, 0 },
+  { 0x80402010, 0x80402010 }, { 0xFFFFFFFF, 0xFFFFFFFF },
+  { 0xFF803FE1, 0 },          { 0xFF00FF01, 0 },
+  { 0xFE823CA6, 0 },          { 0xFE03F810, 0 },
+  { 0xFD863087, 0 },          { 0xFD08E551, 0 },
+  { 0xFC8C15B5, 0 },          { 0xFC0FC0FD, 0 },
+  { 0xFB93E673, 0 },          { 0xFB188566, 0 },
+  { 0xFA9D9D20, 0 },          { 0xFA232CF3, 0 },
+  { 0xF9A9342D, 0 },          { 0xF92FB222, 0 },
+  { 0xF8B6A622, 0xF8B6A622 }, { 0xF83E0F84, 0 },
+  { 0xF7C5ED9D, 0 },          { 0xF74E3FC3, 0 },
+  { 0xF6D7054E, 0 },          { 0xF6603D99, 0 },
+  { 0xF5E9E7FD, 0 },          { 0xF57403D6, 0 },
+  { 0xF4FE9083, 0 },          { 0xF4898D60, 0 },
+  { 0xF414F9CE, 0 },          { 0xF3A0D52D, 0 },
+  { 0xF32D1EE0, 0 },          { 0xF2B9D649, 0 },
+  { 0xF246FACC, 0 },          { 0xF1D48BCF, 0 },
+  { 0xF16288B9, 0 },          { 0xF0F0F0F1, 0 },
+  { 0xF07FC3E0, 0xF07FC3E0 }, { 0xF00F00F0, 0xF00F00F0 },
+  { 0xEF9EA78C, 0 },          { 0xEF2EB720, 0 },
+  { 0xEEBF2F19, 0 },          { 0xEE500EE5, 0xEE500EE5 },
+  { 0xEDE155F4, 0 },          { 0xED7303B6, 0 },
+  { 0xED05179C, 0xED05179C }, { 0xEC979119, 0 },
+  { 0xEC2A6FA0, 0xEC2A6FA0 }, { 0xEBBDB2A6, 0 },
+  { 0xEB5159A0, 0 },          { 0xEAE56404, 0 },
+  { 0xEA79D14A, 0 },          { 0xEA0EA0EA, 0xEA0EA0EA },
+  { 0xE9A3D25E, 0xE9A3D25E }, { 0xE9396520, 0 },
+  { 0xE8CF58AB, 0 },          { 0xE865AC7C, 0 },
+  { 0xE7FC600F, 0 },          { 0xE79372E3, 0 },
+  { 0xE72AE476, 0 },          { 0xE6C2B449, 0 },
+  { 0xE65AE1DC, 0 },          { 0xE5F36CB0, 0xE5F36CB0 },
+  { 0xE58C544A, 0 },          { 0xE525982B, 0 },
+  { 0xE4BF37D9, 0 },          { 0xE45932D8, 0 },
+  { 0xE3F388AF, 0 },          { 0xE38E38E4, 0 },
+  { 0xE32942FF, 0 },          { 0xE2C4A689, 0 },
+  { 0xE260630B, 0 },          { 0xE1FC780F, 0 },
+  { 0xE198E520, 0 },          { 0xE135A9CA, 0 },
+  { 0xE0D2C59A, 0 },          { 0xE070381C, 0xE070381C },
+  { 0xE00E00E0, 0xE00E00E0 }, { 0xDFAC1F75, 0 },
+  { 0xDF4A9369, 0 },          { 0xDEE95C4D, 0 },
+  { 0xDE8879B3, 0 },          { 0xDE27EB2D, 0 },
+  { 0xDDC7B04D, 0 },          { 0xDD67C8A6, 0xDD67C8A6 },
+  { 0xDD0833CE, 0 },          { 0xDCA8F159, 0 },
+  { 0xDC4A00DD, 0 },          { 0xDBEB61EF, 0 },
+  { 0xDB8D1428, 0 },          { 0xDB2F171E, 0 },
+  { 0xDAD16A6B, 0 },          { 0xDA740DA8, 0 },
+  { 0xDA17006D, 0xDA17006D }, { 0xD9BA4257, 0 },
+  { 0xD95DD300, 0 },          { 0xD901B204, 0 },
+  { 0xD8A5DEFF, 0 },          { 0xD84A598F, 0 },
+  { 0xD7EF2152, 0 },          { 0xD79435E5, 0xD79435E5 },
+  { 0xD73996E9, 0 },          { 0xD6DF43FD, 0 },
+  { 0xD6853CC1, 0 },          { 0xD62B80D7, 0 },
+  { 0xD5D20FDF, 0 },          { 0xD578E97D, 0 },
+  { 0xD5200D52, 0xD5200D52 }, { 0xD4C77B04, 0 },
+  { 0xD46F3235, 0 },          { 0xD417328A, 0 },
+  { 0xD3BF7BA9, 0 },          { 0xD3680D37, 0 },
+  { 0xD310E6DB, 0 },          { 0xD2BA083C, 0 },
+  { 0xD2637101, 0 },          { 0xD20D20D2, 0xD20D20D2 },
+  { 0xD1B71759, 0 },          { 0xD161543E, 0xD161543E },
+  { 0xD10BD72C, 0 },          { 0xD0B69FCC, 0 },
+  { 0xD061ADCA, 0 },          { 0xD00D00D0, 0xD00D00D0 },
+  { 0xCFB8988C, 0 },          { 0xCF6474A9, 0 },
+  { 0xCF1094D4, 0 },          { 0xCEBCF8BC, 0 },
+  { 0xCE69A00D, 0 },          { 0xCE168A77, 0xCE168A77 },
+  { 0xCDC3B7A9, 0xCDC3B7A9 }, { 0xCD712753, 0 },
+  { 0xCD1ED924, 0 },          { 0xCCCCCCCD, 0 },
+  { 0xCC7B0200, 0 },          { 0xCC29786D, 0 },
+  { 0xCBD82FC7, 0 },          { 0xCB8727C1, 0 },
+  { 0xCB36600D, 0 },          { 0xCAE5D85F, 0xCAE5D85F },
+  { 0xCA95906C, 0 },          { 0xCA4587E7, 0 },
+  { 0xC9F5BE86, 0 },          { 0xC9A633FD, 0 },
+  { 0xC956E803, 0xC956E803 }, { 0xC907DA4F, 0 },
+  { 0xC8B90A96, 0 },          { 0xC86A7890, 0xC86A7890 },
+  { 0xC81C23F5, 0xC81C23F5 }, { 0xC7CE0C7D, 0 },
+  { 0xC78031E0, 0xC78031E0 }, { 0xC73293D8, 0 },
+  { 0xC6E5321D, 0 },          { 0xC6980C6A, 0 },
+  { 0xC64B2278, 0xC64B2278 }, { 0xC5FE7403, 0xC5FE7403 },
+  { 0xC5B200C6, 0 },          { 0xC565C87C, 0 },
+  { 0xC519CAE0, 0xC519CAE0 }, { 0xC4CE07B0, 0xC4CE07B0 },
+  { 0xC4827EA8, 0xC4827EA8 }, { 0xC4372F86, 0 },
+  { 0xC3EC1A06, 0 },          { 0xC3A13DE6, 0xC3A13DE6 },
+  { 0xC3569AE6, 0 },          { 0xC30C30C3, 0xC30C30C3 },
+  { 0xC2C1FF3E, 0 },          { 0xC2780614, 0 },
+  { 0xC22E4507, 0 },          { 0xC1E4BBD6, 0 },
+  { 0xC19B6A42, 0 },          { 0xC152500C, 0xC152500C },
+  { 0xC1096CF6, 0 },          { 0xC0C0C0C1, 0 },
+  { 0xC0784B2F, 0 },          { 0xC0300C03, 0xC0300C03 },
+  { 0xBFE80300, 0 },          { 0xBFA02FE8, 0xBFA02FE8 },
+  { 0xBF589280, 0 },          { 0xBF112A8B, 0 },
+  { 0xBEC9F7CE, 0 },          { 0xBE82FA0C, 0 },
+  { 0xBE3C310C, 0 },          { 0xBDF59C92, 0 },
+  { 0xBDAF3C64, 0 },          { 0xBD691047, 0xBD691047 },
+  { 0xBD231803, 0 },          { 0xBCDD535E, 0 },
+  { 0xBC97C21E, 0xBC97C21E }, { 0xBC52640C, 0 },
+  { 0xBC0D38EE, 0xBC0D38EE }, { 0xBBC8408D, 0 },
+  { 0xBB837AB1, 0 },          { 0xBB3EE722, 0 },
+  { 0xBAFA85A9, 0xBAFA85A9 }, { 0xBAB65610, 0xBAB65610 },
+  { 0xBA725820, 0xBA725820 }, { 0xBA2E8BA3, 0 },
+  { 0xB9EAF063, 0 },          { 0xB9A7862A, 0xB9A7862A },
+  { 0xB9644CC4, 0 },          { 0xB92143FA, 0xB92143FA },
+  { 0xB8DE6B9A, 0 },          { 0xB89BC36D, 0 },
+  { 0xB8594B41, 0 },          { 0xB81702E1, 0 },
+  { 0xB7D4EA19, 0xB7D4EA19 }, { 0xB79300B8, 0 },
+  { 0xB7514689, 0 },          { 0xB70FBB5A, 0xB70FBB5A },
+  { 0xB6CE5EF9, 0xB6CE5EF9 }, { 0xB68D3134, 0xB68D3134 },
+  { 0xB64C31D9, 0 },          { 0xB60B60B6, 0xB60B60B6 },
+  { 0xB5CABD9B, 0 },          { 0xB58A4855, 0xB58A4855 },
+  { 0xB54A00B5, 0xB54A00B5 }, { 0xB509E68B, 0 },
+  { 0xB4C9F9A5, 0 },          { 0xB48A39D4, 0xB48A39D4 },
+  { 0xB44AA6E9, 0xB44AA6E9 }, { 0xB40B40B4, 0xB40B40B4 },
+  { 0xB3CC0706, 0 },          { 0xB38CF9B0, 0xB38CF9B0 },
+  { 0xB34E1884, 0 },          { 0xB30F6353, 0 },
+  { 0xB2D0D9EF, 0 },          { 0xB2927C2A, 0 },
+  { 0xB25449D7, 0 },          { 0xB21642C9, 0 },
+  { 0xB1D866D1, 0xB1D866D1 }, { 0xB19AB5C5, 0 },
+  { 0xB15D2F76, 0 },          { 0xB11FD3B8, 0xB11FD3B8 },
+  { 0xB0E2A260, 0xB0E2A260 }, { 0xB0A59B42, 0 },
+  { 0xB068BE31, 0 },          { 0xB02C0B03, 0 },
+  { 0xAFEF818C, 0 },          { 0xAFB321A1, 0xAFB321A1 },
+  { 0xAF76EB19, 0 },          { 0xAF3ADDC7, 0 },
+  { 0xAEFEF982, 0 },          { 0xAEC33E20, 0 },
+  { 0xAE87AB76, 0xAE87AB76 }, { 0xAE4C415D, 0 },
+  { 0xAE10FFA9, 0 },          { 0xADD5E632, 0xADD5E632 },
+  { 0xAD9AF4D0, 0 },          { 0xAD602B58, 0xAD602B58 },
+  { 0xAD2589A4, 0 },          { 0xACEB0F89, 0xACEB0F89 },
+  { 0xACB0BCE1, 0xACB0BCE1 }, { 0xAC769184, 0xAC769184 },
+  { 0xAC3C8D4A, 0 },          { 0xAC02B00B, 0 },
+  { 0xABC8F9A0, 0xABC8F9A0 }, { 0xAB8F69E3, 0 },
+  { 0xAB5600AC, 0 },          { 0xAB1CBDD4, 0 },
+  { 0xAAE3A136, 0 },          { 0xAAAAAAAB, 0 },
+  { 0xAA71DA0D, 0 },          { 0xAA392F36, 0 },
+  { 0xAA00AA01, 0 },          { 0xA9C84A48, 0 },
+  { 0xA9900FE6, 0 },          { 0xA957FAB5, 0xA957FAB5 },
+  { 0xA9200A92, 0xA9200A92 }, { 0xA8E83F57, 0xA8E83F57 },
+  { 0xA8B098E0, 0xA8B098E0 }, { 0xA8791709, 0 },
+  { 0xA841B9AD, 0 },          { 0xA80A80A8, 0xA80A80A8 },
+  { 0xA7D36BD8, 0 },          { 0xA79C7B17, 0 },
+  { 0xA765AE44, 0 },          { 0xA72F053A, 0 },
+  { 0xA6F87FD6, 0xA6F87FD6 }, { 0xA6C21DF7, 0 },
+  { 0xA68BDF79, 0 },          { 0xA655C439, 0xA655C439 },
+  { 0xA61FCC16, 0xA61FCC16 }, { 0xA5E9F6ED, 0xA5E9F6ED },
+  { 0xA5B4449D, 0 },          { 0xA57EB503, 0 },
+  { 0xA54947FE, 0 },          { 0xA513FD6C, 0 },
+  { 0xA4DED52C, 0xA4DED52C }, { 0xA4A9CF1E, 0 },
+  { 0xA474EB1F, 0xA474EB1F }, { 0xA4402910, 0xA4402910 },
+  { 0xA40B88D0, 0 },          { 0xA3D70A3E, 0 },
+  { 0xA3A2AD39, 0xA3A2AD39 }, { 0xA36E71A3, 0 },
+  { 0xA33A575A, 0xA33A575A }, { 0xA3065E40, 0 },
+  { 0xA2D28634, 0 },          { 0xA29ECF16, 0xA29ECF16 },
+  { 0xA26B38C9, 0 },          { 0xA237C32B, 0xA237C32B },
+  { 0xA2046E1F, 0xA2046E1F }, { 0xA1D13986, 0 },
+  { 0xA19E2540, 0 },          { 0xA16B312F, 0 },
+  { 0xA1385D35, 0 },          { 0xA105A933, 0 },
+  { 0xA0D3150C, 0 },          { 0xA0A0A0A1, 0 },
+  { 0xA06E4BD4, 0xA06E4BD4 }, { 0xA03C1689, 0 },
+  { 0xA00A00A0, 0xA00A00A0 }, { 0x9FD809FE, 0 },
+  { 0x9FA63284, 0 },          { 0x9F747A15, 0x9F747A15 },
+  { 0x9F42E095, 0x9F42E095 }, { 0x9F1165E7, 0x9F1165E7 },
+  { 0x9EE009EE, 0x9EE009EE }, { 0x9EAECC8D, 0x9EAECC8D },
+  { 0x9E7DADA9, 0 },          { 0x9E4CAD24, 0 },
+  { 0x9E1BCAE3, 0 },          { 0x9DEB06C9, 0x9DEB06C9 },
+  { 0x9DBA60BB, 0x9DBA60BB }, { 0x9D89D89E, 0 },
+  { 0x9D596E54, 0x9D596E54 }, { 0x9D2921C4, 0 },
+  { 0x9CF8F2D1, 0x9CF8F2D1 }, { 0x9CC8E161, 0 },
+  { 0x9C98ED58, 0 },          { 0x9C69169B, 0x9C69169B },
+  { 0x9C395D10, 0x9C395D10 }, { 0x9C09C09C, 0x9C09C09C },
+  { 0x9BDA4124, 0x9BDA4124 }, { 0x9BAADE8E, 0x9BAADE8E },
+  { 0x9B7B98C0, 0 },          { 0x9B4C6F9F, 0 },
+  { 0x9B1D6311, 0x9B1D6311 }, { 0x9AEE72FD, 0 },
+  { 0x9ABF9F48, 0x9ABF9F48 }, { 0x9A90E7D9, 0x9A90E7D9 },
+  { 0x9A624C97, 0 },          { 0x9A33CD67, 0x9A33CD67 },
+  { 0x9A056A31, 0 },          { 0x99D722DB, 0 },
+  { 0x99A8F74C, 0 },          { 0x997AE76B, 0x997AE76B },
+  { 0x994CF320, 0x994CF320 }, { 0x991F1A51, 0x991F1A51 },
+  { 0x98F15CE7, 0 },          { 0x98C3BAC7, 0x98C3BAC7 },
+  { 0x989633DB, 0x989633DB }, { 0x9868C80A, 0 },
+  { 0x983B773B, 0 },          { 0x980E4156, 0x980E4156 },
+  { 0x97E12644, 0x97E12644 }, { 0x97B425ED, 0x97B425ED },
+  { 0x97874039, 0 },          { 0x975A7510, 0 },
+  { 0x972DC45B, 0 },          { 0x97012E02, 0x97012E02 },
+  { 0x96D4B1EF, 0 },          { 0x96A8500A, 0 },
+  { 0x967C083B, 0 },          { 0x964FDA6C, 0x964FDA6C },
+  { 0x9623C686, 0x9623C686 }, { 0x95F7CC73, 0 },
+  { 0x95CBEC1B, 0 },          { 0x95A02568, 0x95A02568 },
+  { 0x95747844, 0 },          { 0x9548E498, 0 },
+  { 0x951D6A4E, 0 },          { 0x94F2094F, 0x94F2094F },
+  { 0x94C6C187, 0 },          { 0x949B92DE, 0 },
+  { 0x94707D3F, 0 },          { 0x94458094, 0x94458094 },
+  { 0x941A9CC8, 0x941A9CC8 }, { 0x93EFD1C5, 0x93EFD1C5 },
+  { 0x93C51F76, 0 },          { 0x939A85C4, 0x939A85C4 },
+  { 0x9370049C, 0 },          { 0x93459BE7, 0 },
+  { 0x931B4B91, 0 },          { 0x92F11384, 0x92F11384 },
+  { 0x92C6F3AC, 0x92C6F3AC }, { 0x929CEBF5, 0 },
+  { 0x9272FC48, 0x9272FC48 }, { 0x92492492, 0x92492492 },
+  { 0x921F64BF, 0 },          { 0x91F5BCB9, 0 },
+  { 0x91CC2C6C, 0x91CC2C6C }, { 0x91A2B3C5, 0 },
+  { 0x917952AF, 0 },          { 0x91500915, 0x91500915 },
+  { 0x9126D6E5, 0 },          { 0x90FDBC09, 0x90FDBC09 },
+  { 0x90D4B86F, 0 },          { 0x90ABCC02, 0x90ABCC02 },
+  { 0x9082F6B0, 0 },          { 0x905A3863, 0x905A3863 },
+  { 0x9031910A, 0 },          { 0x90090090, 0x90090090 },
+  { 0x8FE086E3, 0 },          { 0x8FB823EE, 0x8FB823EE },
+  { 0x8F8FD7A0, 0 },          { 0x8F67A1E4, 0 },
+  { 0x8F3F82A8, 0x8F3F82A8 }, { 0x8F1779DA, 0 },
+  { 0x8EEF8766, 0 },          { 0x8EC7AB3A, 0 },
+  { 0x8E9FE542, 0x8E9FE542 }, { 0x8E78356D, 0x8E78356D },
+  { 0x8E509BA8, 0x8E509BA8 }, { 0x8E2917E1, 0 },
+  { 0x8E01AA05, 0 },          { 0x8DDA5202, 0x8DDA5202 },
+  { 0x8DB30FC6, 0x8DB30FC6 }, { 0x8D8BE340, 0 },
+  { 0x8D64CC5C, 0 },          { 0x8D3DCB09, 0 },
+  { 0x8D16DF35, 0x8D16DF35 }, { 0x8CF008CF, 0x8CF008CF },
+  { 0x8CC947C5, 0 },          { 0x8CA29C04, 0x8CA29C04 },
+  { 0x8C7C057D, 0 },          { 0x8C55841D, 0 },
+  { 0x8C2F17D2, 0x8C2F17D2 }, { 0x8C08C08C, 0x8C08C08C },
+  { 0x8BE27E39, 0x8BE27E39 }, { 0x8BBC50C9, 0 },
+  { 0x8B963829, 0x8B963829 }, { 0x8B70344A, 0x8B70344A },
+  { 0x8B4A451A, 0 },          { 0x8B246A88, 0 },
+  { 0x8AFEA483, 0x8AFEA483 }, { 0x8AD8F2FC, 0 },
+  { 0x8AB355E0, 0x8AB355E0 }, { 0x8A8DCD20, 0 },
+  { 0x8A6858AB, 0 },          { 0x8A42F870, 0x8A42F870 },
+  { 0x8A1DAC60, 0x8A1DAC60 }, { 0x89F8746A, 0 },
+  { 0x89D3507D, 0 },          { 0x89AE408A, 0 },
+  { 0x89894480, 0 },          { 0x89645C4F, 0x89645C4F },
+  { 0x893F87E8, 0x893F87E8 }, { 0x891AC73B, 0 },
+  { 0x88F61A37, 0x88F61A37 }, { 0x88D180CD, 0x88D180CD },
+  { 0x88ACFAEE, 0 },          { 0x88888889, 0 },
+  { 0x8864298F, 0 },          { 0x883FDDF0, 0x883FDDF0 },
+  { 0x881BA59E, 0 },          { 0x87F78088, 0 },
+  { 0x87D36EA0, 0 },          { 0x87AF6FD6, 0 },
+  { 0x878B841B, 0 },          { 0x8767AB5F, 0x8767AB5F },
+  { 0x8743E595, 0 },          { 0x872032AC, 0x872032AC },
+  { 0x86FC9296, 0x86FC9296 }, { 0x86D90545, 0 },
+  { 0x86B58AA8, 0 },          { 0x869222B2, 0 },
+  { 0x866ECD53, 0x866ECD53 }, { 0x864B8A7E, 0 },
+  { 0x86285A23, 0x86285A23 }, { 0x86053C34, 0x86053C34 },
+  { 0x85E230A3, 0x85E230A3 }, { 0x85BF3761, 0x85BF3761 },
+  { 0x859C5060, 0x859C5060 }, { 0x85797B91, 0x85797B91 },
+  { 0x8556B8E7, 0x8556B8E7 }, { 0x85340853, 0x85340853 },
+  { 0x851169C7, 0x851169C7 }, { 0x84EEDD36, 0 },
+  { 0x84CC6290, 0 },          { 0x84A9F9C8, 0x84A9F9C8 },
+  { 0x8487A2D1, 0 },          { 0x84655D9C, 0 },
+  { 0x84432A1B, 0x84432A1B }, { 0x84210842, 0x84210842 },
+  { 0x83FEF802, 0x83FEF802 }, { 0x83DCF94E, 0 },
+  { 0x83BB0C18, 0 },          { 0x83993052, 0x83993052 },
+  { 0x837765F0, 0x837765F0 }, { 0x8355ACE4, 0 },
+  { 0x83340520, 0x83340520 }, { 0x83126E98, 0 },
+  { 0x82F0E93D, 0x82F0E93D }, { 0x82CF7504, 0 },
+  { 0x82AE11DE, 0 },          { 0x828CBFBF, 0 },
+  { 0x826B7E99, 0x826B7E99 }, { 0x824A4E61, 0 },
+  { 0x82292F08, 0 },          { 0x82082082, 0x82082082 },
+  { 0x81E722C2, 0x81E722C2 }, { 0x81C635BC, 0x81C635BC },
+  { 0x81A55963, 0 },          { 0x81848DA9, 0 },
+  { 0x8163D283, 0 },          { 0x814327E4, 0 },
+  { 0x81228DBF, 0 },          { 0x81020408, 0x81020408 },
+  { 0x80E18AB3, 0 },          { 0x80C121B3, 0 },
+  { 0x80A0C8FB, 0x80A0C8FB }, { 0x80808081, 0 },
+  { 0x80604836, 0x80604836 }, { 0x80402010, 0x80402010 },
+  { 0x80200802, 0x80200802 }, { 0xFFFFFFFF, 0xFFFFFFFF }
+};
diff --git a/av1/common/odintrin.h b/av1/common/odintrin.h
new file mode 100644
index 0000000..87b1a36
--- /dev/null
+++ b/av1/common/odintrin.h
@@ -0,0 +1,48 @@
+#ifndef VP10_COMMON_ODINTRIN_H_
+#define VP10_COMMON_ODINTRIN_H_
+
+#include "av1/common/enums.h"
+#include "aom/vpx_integer.h"
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_ports/bitops.h"
+
+/*Smallest blocks are 4x4*/
+#define OD_LOG_BSIZE0 (2)
+/*There are 5 block sizes total (4x4, 8x8, 16x16, 32x32 and 64x64).*/
+#define OD_NBSIZES (5)
+/*The log of the maximum length of the side of a block.*/
+#define OD_LOG_BSIZE_MAX (OD_LOG_BSIZE0 + OD_NBSIZES - 1)
+/*The maximum length of the side of a block.*/
+#define OD_BSIZE_MAX (1 << OD_LOG_BSIZE_MAX)
+
+typedef int od_coeff;
+
+typedef int16_t od_dering_in;
+
+#define OD_DIVU_DMAX (1024)
+
+extern uint32_t OD_DIVU_SMALL_CONSTS[OD_DIVU_DMAX][2];
+
+/*Translate unsigned division by small divisors into multiplications.*/
+#define OD_DIVU_SMALL(_x, _d)                                     \
+  ((uint32_t)((OD_DIVU_SMALL_CONSTS[(_d)-1][0] * (uint64_t)(_x) + \
+               OD_DIVU_SMALL_CONSTS[(_d)-1][1]) >>                \
+              32) >>                                              \
+   (OD_ILOG(_d) - 1))
+
+#define OD_DIVU(_x, _d) \
+  (((_d) < OD_DIVU_DMAX) ? (OD_DIVU_SMALL((_x), (_d))) : ((_x) / (_d)))
+
+#define OD_MINI VPXMIN
+#define OD_CLAMPI(min, val, max) clamp((val), (min), (max))
+
+#define OD_CLZ0 (1)
+#define OD_CLZ(x) (-get_msb(x))
+#define OD_ILOG_NZ(x) (OD_CLZ0 - OD_CLZ(x))
+/*Note that __builtin_clz is not defined when x == 0, according to the gcc
+   documentation (and that of the x86 BSR instruction that implements it), so
+   we have to special-case it.
+  We define a special version of the macro to use when x can be zero.*/
+#define OD_ILOG(x) ((x) ? OD_ILOG_NZ(x) : 0)
+
+#endif
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
new file mode 100644
index 0000000..55a8112
--- /dev/null
+++ b/av1/common/onyxc_int.h
@@ -0,0 +1,740 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_ONYXC_INT_H_
+#define VP10_COMMON_ONYXC_INT_H_
+
+#include "./vpx_config.h"
+#include "aom/internal/vpx_codec_internal.h"
+#include "aom_util/vpx_thread.h"
+#include "./vp10_rtcd.h"
+#include "av1/common/alloccommon.h"
+#include "av1/common/loopfilter.h"
+#include "av1/common/entropymv.h"
+#include "av1/common/entropy.h"
+#include "av1/common/entropymode.h"
+#include "av1/common/mv.h"
+#include "av1/common/frame_buffers.h"
+#include "av1/common/quant_common.h"
+#include "av1/common/tile_common.h"
+#include "av1/common/restoration.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define REF_FRAMES_LOG2 3
+#define REF_FRAMES (1 << REF_FRAMES_LOG2)
+
+// 4 scratch frames for the new frames to support a maximum of 4 cores decoding
+// in parallel, 3 for scaled references on the encoder.
+// TODO(hkuang): Add ondemand frame buffers instead of hardcoding the number
+// of framebuffers.
+// TODO(jkoleszar): These 3 extra references could probably come from the
+// normal reference pool.
+#define FRAME_BUFFERS (REF_FRAMES + 7)
+
+#if CONFIG_EXT_REFS
+#define FRAME_CONTEXTS_LOG2 3
+#else
+#define FRAME_CONTEXTS_LOG2 2
+#endif
+
+#define FRAME_CONTEXTS (1 << FRAME_CONTEXTS_LOG2)
+
+#define NUM_PING_PONG_BUFFERS 2
+
+typedef enum {
+  SINGLE_REFERENCE = 0,
+  COMPOUND_REFERENCE = 1,
+  REFERENCE_MODE_SELECT = 2,
+  REFERENCE_MODES = 3,
+} REFERENCE_MODE;
+
+typedef enum {
+  RESET_FRAME_CONTEXT_NONE = 0,
+  RESET_FRAME_CONTEXT_CURRENT = 1,
+  RESET_FRAME_CONTEXT_ALL = 2,
+} RESET_FRAME_CONTEXT_MODE;
+
+typedef enum {
+  /**
+   * Update frame context to values resulting from forward probability
+   * updates signaled in the frame header
+   */
+  REFRESH_FRAME_CONTEXT_FORWARD,
+  /**
+   * Update frame context to values resulting from backward probability
+   * updates based on entropy/counts in the decoded frame
+   */
+  REFRESH_FRAME_CONTEXT_BACKWARD,
+} REFRESH_FRAME_CONTEXT_MODE;
+
+typedef struct {
+  int_mv mv[2];
+  MV_REFERENCE_FRAME ref_frame[2];
+} MV_REF;
+
+typedef struct {
+  int ref_count;
+  MV_REF *mvs;
+  int mi_rows;
+  int mi_cols;
+  vpx_codec_frame_buffer_t raw_frame_buffer;
+  YV12_BUFFER_CONFIG buf;
+
+  // The Following variables will only be used in frame parallel decode.
+
+  // frame_worker_owner indicates which FrameWorker owns this buffer. NULL means
+  // that no FrameWorker owns, or is decoding, this buffer.
+  VPxWorker *frame_worker_owner;
+
+  // row and col indicate which position frame has been decoded to in real
+  // pixel unit. They are reset to -1 when decoding begins and set to INT_MAX
+  // when the frame is fully decoded.
+  int row;
+  int col;
+} RefCntBuffer;
+
+typedef struct BufferPool {
+// Protect BufferPool from being accessed by several FrameWorkers at
+// the same time during frame parallel decode.
+// TODO(hkuang): Try to use atomic variable instead of locking the whole pool.
+#if CONFIG_MULTITHREAD
+  pthread_mutex_t pool_mutex;
+#endif
+
+  // Private data associated with the frame buffer callbacks.
+  void *cb_priv;
+
+  vpx_get_frame_buffer_cb_fn_t get_fb_cb;
+  vpx_release_frame_buffer_cb_fn_t release_fb_cb;
+
+  RefCntBuffer frame_bufs[FRAME_BUFFERS];
+
+  // Frame buffers allocated internally by the codec.
+  InternalFrameBufferList int_frame_buffers;
+} BufferPool;
+
+typedef struct VP10Common {
+  struct vpx_internal_error_info error;
+  vpx_color_space_t color_space;
+  int color_range;
+  int width;
+  int height;
+  int render_width;
+  int render_height;
+  int last_width;
+  int last_height;
+
+  // TODO(jkoleszar): this implies chroma ss right now, but could vary per
+  // plane. Revisit as part of the future change to YV12_BUFFER_CONFIG to
+  // support additional planes.
+  int subsampling_x;
+  int subsampling_y;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  // Marks if we need to use 16bit frame buffers (1: yes, 0: no).
+  int use_highbitdepth;
+#endif
+#if CONFIG_CLPF
+  int clpf;
+#endif
+
+  YV12_BUFFER_CONFIG *frame_to_show;
+  RefCntBuffer *prev_frame;
+
+  // TODO(hkuang): Combine this with cur_buf in macroblockd.
+  RefCntBuffer *cur_frame;
+
+  int ref_frame_map[REF_FRAMES]; /* maps fb_idx to reference slot */
+
+  // Prepare ref_frame_map for the next frame.
+  // Only used in frame parallel decode.
+  int next_ref_frame_map[REF_FRAMES];
+
+  // TODO(jkoleszar): could expand active_ref_idx to 4, with 0 as intra, and
+  // roll new_fb_idx into it.
+
+  // Each Inter frame can reference INTER_REFS_PER_FRAME buffers
+  RefBuffer frame_refs[INTER_REFS_PER_FRAME];
+
+  int new_fb_idx;
+
+  FRAME_TYPE last_frame_type; /* last frame's frame type for motion search.*/
+#if CONFIG_EXT_REFS
+  // frame type of the frame before last frame
+  FRAME_TYPE last2_frame_type;
+  // TODO(zoeliu): To check whether last3_frame_type is still needed.
+  // frame type of the frame two frames before last frame
+  FRAME_TYPE last3_frame_type;
+#endif  // CONFIG_EXT_REFS
+  FRAME_TYPE frame_type;
+
+  int show_frame;
+  int last_show_frame;
+  int show_existing_frame;
+#if CONFIG_EXT_REFS
+  // Flag for a frame used as a reference - not written to the bitstream
+  int is_reference_frame;
+#endif  // CONFIG_EXT_REFS
+
+  // Flag signaling that the frame is encoded using only INTRA modes.
+  uint8_t intra_only;
+  uint8_t last_intra_only;
+
+  int allow_high_precision_mv;
+
+  int allow_screen_content_tools;
+
+  // Flag signaling which frame contexts should be reset to default values.
+  RESET_FRAME_CONTEXT_MODE reset_frame_context;
+
+  // MBs, mb_rows/cols is in 16-pixel units; mi_rows/cols is in
+  // MODE_INFO (8-pixel) units.
+  int MBs;
+  int mb_rows, mi_rows;
+  int mb_cols, mi_cols;
+  int mi_stride;
+
+  /* profile settings */
+  TX_MODE tx_mode;
+
+  int base_qindex;
+  int y_dc_delta_q;
+  int uv_dc_delta_q;
+  int uv_ac_delta_q;
+  int16_t y_dequant[MAX_SEGMENTS][2];
+  int16_t uv_dequant[MAX_SEGMENTS][2];
+
+#if CONFIG_AOM_QM
+  // Global quant matrix tables
+  qm_val_t *giqmatrix[NUM_QM_LEVELS][2][2][TX_SIZES];
+  qm_val_t *gqmatrix[NUM_QM_LEVELS][2][2][TX_SIZES];
+
+  // Local quant matrix tables for each frame
+  qm_val_t *y_iqmatrix[MAX_SEGMENTS][2][TX_SIZES];
+  qm_val_t *uv_iqmatrix[MAX_SEGMENTS][2][TX_SIZES];
+  // Encoder
+  qm_val_t *y_qmatrix[MAX_SEGMENTS][2][TX_SIZES];
+  qm_val_t *uv_qmatrix[MAX_SEGMENTS][2][TX_SIZES];
+
+  int using_qmatrix;
+  int min_qmlevel;
+  int max_qmlevel;
+#endif
+#if CONFIG_NEW_QUANT
+  dequant_val_type_nuq y_dequant_nuq[MAX_SEGMENTS][QUANT_PROFILES][COEF_BANDS];
+  dequant_val_type_nuq uv_dequant_nuq[MAX_SEGMENTS][QUANT_PROFILES][COEF_BANDS];
+#endif
+
+  /* We allocate a MODE_INFO struct for each macroblock, together with
+     an extra row on top and column on the left to simplify prediction. */
+  int mi_alloc_size;
+  MODE_INFO *mip; /* Base of allocated array */
+  MODE_INFO *mi;  /* Corresponds to upper left visible macroblock */
+
+  // TODO(agrange): Move prev_mi into encoder structure.
+  // prev_mip and prev_mi will only be allocated in encoder.
+  MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */
+  MODE_INFO *prev_mi;  /* 'mi' from last frame (points into prev_mip) */
+
+  // Separate mi functions between encoder and decoder.
+  int (*alloc_mi)(struct VP10Common *cm, int mi_size);
+  void (*free_mi)(struct VP10Common *cm);
+  void (*setup_mi)(struct VP10Common *cm);
+
+  // Grid of pointers to 8x8 MODE_INFO structs.  Any 8x8 not in the visible
+  // area will be NULL.
+  MODE_INFO **mi_grid_base;
+  MODE_INFO **mi_grid_visible;
+  MODE_INFO **prev_mi_grid_base;
+  MODE_INFO **prev_mi_grid_visible;
+
+  // Whether to use previous frame's motion vectors for prediction.
+  int use_prev_frame_mvs;
+
+  // Persistent mb segment id map used in prediction.
+  int seg_map_idx;
+  int prev_seg_map_idx;
+
+  uint8_t *seg_map_array[NUM_PING_PONG_BUFFERS];
+  uint8_t *last_frame_seg_map;
+  uint8_t *current_frame_seg_map;
+  int seg_map_alloc_size;
+
+  INTERP_FILTER interp_filter;
+
+  loop_filter_info_n lf_info;
+#if CONFIG_LOOP_RESTORATION
+  RestorationInfo rst_info;
+  RestorationInternal rst_internal;
+#endif  // CONFIG_LOOP_RESTORATION
+
+  // Flag signaling how frame contexts should be updated at the end of
+  // a frame decode
+  REFRESH_FRAME_CONTEXT_MODE refresh_frame_context;
+
+  int ref_frame_sign_bias[TOTAL_REFS_PER_FRAME]; /* Two state 0, 1 */
+
+  struct loopfilter lf;
+  struct segmentation seg;
+
+  int frame_parallel_decode;  // frame-based threading.
+
+// Context probabilities for reference frame prediction
+#if CONFIG_EXT_REFS
+  MV_REFERENCE_FRAME comp_fwd_ref[FWD_REFS];
+  MV_REFERENCE_FRAME comp_bwd_ref[BWD_REFS];
+#else
+  MV_REFERENCE_FRAME comp_fixed_ref;
+  MV_REFERENCE_FRAME comp_var_ref[COMP_REFS];
+#endif  // CONFIG_EXT_REFS
+  REFERENCE_MODE reference_mode;
+
+  FRAME_CONTEXT *fc;              /* this frame entropy */
+  FRAME_CONTEXT *frame_contexts;  // FRAME_CONTEXTS
+  unsigned int frame_context_idx; /* Context to use/update */
+  FRAME_COUNTS counts;
+
+#if CONFIG_ENTROPY
+  // The initial probabilities for a frame, before any subframe backward update,
+  // and after forward update.
+  vp10_coeff_probs_model starting_coef_probs[TX_SIZES][PLANE_TYPES];
+  // Number of subframe backward updates already done
+  uint8_t coef_probs_update_idx;
+  // Signal if the backward update is subframe or end-of-frame
+  uint8_t partial_prob_update;
+  // Frame level flag to turn on/off subframe backward update
+  uint8_t do_subframe_update;
+#endif  // CONFIG_ENTROPY
+
+  unsigned int current_video_frame;
+  BITSTREAM_PROFILE profile;
+
+  // VPX_BITS_8 in profile 0 or 1, VPX_BITS_10 or VPX_BITS_12 in profile 2 or 3.
+  vpx_bit_depth_t bit_depth;
+  vpx_bit_depth_t dequant_bit_depth;  // bit_depth of current dequantizer
+
+  int error_resilient_mode;
+
+#if !CONFIG_EXT_TILE
+  int log2_tile_cols, log2_tile_rows;
+#endif  // !CONFIG_EXT_TILE
+  int tile_cols, tile_rows;
+  int tile_width, tile_height;  // In MI units
+
+  int byte_alignment;
+  int skip_loop_filter;
+
+  // Private data associated with the frame buffer callbacks.
+  void *cb_priv;
+  vpx_get_frame_buffer_cb_fn_t get_fb_cb;
+  vpx_release_frame_buffer_cb_fn_t release_fb_cb;
+
+  // Handles memory for the codec.
+  InternalFrameBufferList int_frame_buffers;
+
+  // External BufferPool passed from outside.
+  BufferPool *buffer_pool;
+
+  PARTITION_CONTEXT *above_seg_context;
+  ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
+#if CONFIG_VAR_TX
+  TXFM_CONTEXT *above_txfm_context;
+  TXFM_CONTEXT left_txfm_context[MAX_MIB_SIZE];
+#endif
+  int above_context_alloc_cols;
+
+  // scratch memory for intraonly/keyframe forward updates from default tables
+  // - this is intentionally not placed in FRAME_CONTEXT since it's reset upon
+  // each keyframe and not used afterwards
+  vpx_prob kf_y_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1];
+#if CONFIG_GLOBAL_MOTION
+  Global_Motion_Params global_motion[TOTAL_REFS_PER_FRAME];
+#endif
+
+  BLOCK_SIZE sb_size;  // Size of the superblock used for this frame
+  int mib_size;        // Size of the superblock in units of MI blocks
+  int mib_size_log2;   // Log 2 of above.
+#if CONFIG_DERING
+  int dering_level;
+#endif
+} VP10_COMMON;
+
+// TODO(hkuang): Don't need to lock the whole pool after implementing atomic
+// frame reference count.
+static void lock_buffer_pool(BufferPool *const pool) {
+#if CONFIG_MULTITHREAD
+  pthread_mutex_lock(&pool->pool_mutex);
+#else
+  (void)pool;
+#endif
+}
+
+static void unlock_buffer_pool(BufferPool *const pool) {
+#if CONFIG_MULTITHREAD
+  pthread_mutex_unlock(&pool->pool_mutex);
+#else
+  (void)pool;
+#endif
+}
+
+static INLINE YV12_BUFFER_CONFIG *get_ref_frame(VP10_COMMON *cm, int index) {
+  if (index < 0 || index >= REF_FRAMES) return NULL;
+  if (cm->ref_frame_map[index] < 0) return NULL;
+  assert(cm->ref_frame_map[index] < FRAME_BUFFERS);
+  return &cm->buffer_pool->frame_bufs[cm->ref_frame_map[index]].buf;
+}
+
+static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(
+    const VP10_COMMON *const cm) {
+  return &cm->buffer_pool->frame_bufs[cm->new_fb_idx].buf;
+}
+
+static INLINE int get_free_fb(VP10_COMMON *cm) {
+  RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
+  int i;
+
+  lock_buffer_pool(cm->buffer_pool);
+  for (i = 0; i < FRAME_BUFFERS; ++i)
+    if (frame_bufs[i].ref_count == 0) break;
+
+  if (i != FRAME_BUFFERS) {
+    frame_bufs[i].ref_count = 1;
+  } else {
+    // Reset i to be INVALID_IDX to indicate no free buffer found.
+    i = INVALID_IDX;
+  }
+
+  unlock_buffer_pool(cm->buffer_pool);
+  return i;
+}
+
+static INLINE void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) {
+  const int ref_index = *idx;
+
+  if (ref_index >= 0 && bufs[ref_index].ref_count > 0)
+    bufs[ref_index].ref_count--;
+
+  *idx = new_idx;
+
+  bufs[new_idx].ref_count++;
+}
+
+static INLINE int mi_cols_aligned_to_sb(const VP10_COMMON *cm) {
+  return ALIGN_POWER_OF_TWO(cm->mi_cols, cm->mib_size_log2);
+}
+
+static INLINE int mi_rows_aligned_to_sb(const VP10_COMMON *cm) {
+  return ALIGN_POWER_OF_TWO(cm->mi_rows, cm->mib_size_log2);
+}
+
+static INLINE int frame_is_intra_only(const VP10_COMMON *const cm) {
+  return cm->frame_type == KEY_FRAME || cm->intra_only;
+}
+
+static INLINE void vp10_init_macroblockd(VP10_COMMON *cm, MACROBLOCKD *xd,
+                                         tran_low_t *dqcoeff) {
+  int i;
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
+    xd->plane[i].dqcoeff = dqcoeff;
+    xd->above_context[i] = cm->above_context[i];
+    if (xd->plane[i].plane_type == PLANE_TYPE_Y) {
+      memcpy(xd->plane[i].seg_dequant, cm->y_dequant, sizeof(cm->y_dequant));
+#if CONFIG_AOM_QM
+      memcpy(xd->plane[i].seg_iqmatrix, cm->y_iqmatrix, sizeof(cm->y_iqmatrix));
+#endif
+
+#if CONFIG_NEW_QUANT
+      memcpy(xd->plane[i].seg_dequant_nuq, cm->y_dequant_nuq,
+             sizeof(cm->y_dequant_nuq));
+#endif
+    } else {
+      memcpy(xd->plane[i].seg_dequant, cm->uv_dequant, sizeof(cm->uv_dequant));
+#if CONFIG_AOM_QM
+      memcpy(xd->plane[i].seg_iqmatrix, cm->uv_iqmatrix,
+             sizeof(cm->uv_iqmatrix));
+#endif
+#if CONFIG_NEW_QUANT
+      memcpy(xd->plane[i].seg_dequant_nuq, cm->uv_dequant_nuq,
+             sizeof(cm->uv_dequant_nuq));
+#endif
+    }
+    xd->fc = cm->fc;
+  }
+
+  xd->above_seg_context = cm->above_seg_context;
+#if CONFIG_VAR_TX
+  xd->above_txfm_context = cm->above_txfm_context;
+#endif
+  xd->mi_stride = cm->mi_stride;
+  xd->error_info = &cm->error;
+}
+
+static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) {
+  const int above_idx = mi_col * 2;
+  const int left_idx = (mi_row * 2) & MAX_MIB_MASK_2;
+  int i;
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
+    struct macroblockd_plane *const pd = &xd->plane[i];
+    pd->above_context = &xd->above_context[i][above_idx >> pd->subsampling_x];
+    pd->left_context = &xd->left_context[i][left_idx >> pd->subsampling_y];
+  }
+}
+
+static INLINE int calc_mi_size(int len) {
+  // len is in mi units.
+  return len + MAX_MIB_SIZE;
+}
+
+static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile,
+                                  int mi_row, int bh, int mi_col, int bw,
+                                  int mi_rows, int mi_cols) {
+  xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
+  xd->mb_to_bottom_edge = ((mi_rows - bh - mi_row) * MI_SIZE) * 8;
+  xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
+  xd->mb_to_right_edge = ((mi_cols - bw - mi_col) * MI_SIZE) * 8;
+
+  // Are edges available for intra prediction?
+  xd->up_available = (mi_row > tile->mi_row_start);
+  xd->left_available = (mi_col > tile->mi_col_start);
+  if (xd->up_available) {
+    xd->above_mi = xd->mi[-xd->mi_stride];
+    // above_mi may be NULL in encoder's first pass.
+    xd->above_mbmi = xd->above_mi ? &xd->above_mi->mbmi : NULL;
+  } else {
+    xd->above_mi = NULL;
+    xd->above_mbmi = NULL;
+  }
+
+  if (xd->left_available) {
+    xd->left_mi = xd->mi[-1];
+    // left_mi may be NULL in encoder's first pass.
+    xd->left_mbmi = xd->left_mi ? &xd->left_mi->mbmi : NULL;
+  } else {
+    xd->left_mi = NULL;
+    xd->left_mbmi = NULL;
+  }
+
+  xd->n8_h = bh;
+  xd->n8_w = bw;
+#if CONFIG_REF_MV
+  xd->is_sec_rect = 0;
+  if (xd->n8_w < xd->n8_h)
+    if (mi_col & (xd->n8_h - 1)) xd->is_sec_rect = 1;
+
+  if (xd->n8_w > xd->n8_h)
+    if (mi_row & (xd->n8_w - 1)) xd->is_sec_rect = 1;
+#endif
+}
+
+static INLINE const vpx_prob *get_y_mode_probs(const VP10_COMMON *cm,
+                                               const MODE_INFO *mi,
+                                               const MODE_INFO *above_mi,
+                                               const MODE_INFO *left_mi,
+                                               int block) {
+  const PREDICTION_MODE above = vp10_above_block_mode(mi, above_mi, block);
+  const PREDICTION_MODE left = vp10_left_block_mode(mi, left_mi, block);
+  return cm->kf_y_prob[above][left];
+}
+
+static INLINE void update_partition_context(MACROBLOCKD *xd, int mi_row,
+                                            int mi_col, BLOCK_SIZE subsize,
+                                            BLOCK_SIZE bsize) {
+  PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col;
+  PARTITION_CONTEXT *const left_ctx =
+      xd->left_seg_context + (mi_row & MAX_MIB_MASK);
+
+#if CONFIG_EXT_PARTITION_TYPES
+  const int bw = num_8x8_blocks_wide_lookup[bsize];
+  const int bh = num_8x8_blocks_high_lookup[bsize];
+  memset(above_ctx, partition_context_lookup[subsize].above, bw);
+  memset(left_ctx, partition_context_lookup[subsize].left, bh);
+#else
+  // num_4x4_blocks_wide_lookup[bsize] / 2
+  const int bs = num_8x8_blocks_wide_lookup[bsize];
+
+  // update the partition context at the end notes. set partition bits
+  // of block sizes larger than the current one to be one, and partition
+  // bits of smaller block sizes to be zero.
+  memset(above_ctx, partition_context_lookup[subsize].above, bs);
+  memset(left_ctx, partition_context_lookup[subsize].left, bs);
+#endif  // CONFIG_EXT_PARTITION_TYPES
+}
+
+#if CONFIG_EXT_PARTITION_TYPES
+static INLINE void update_ext_partition_context(MACROBLOCKD *xd, int mi_row,
+                                                int mi_col, BLOCK_SIZE subsize,
+                                                BLOCK_SIZE bsize,
+                                                PARTITION_TYPE partition) {
+  if (bsize >= BLOCK_8X8) {
+    const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
+    BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
+    switch (partition) {
+      case PARTITION_SPLIT:
+        if (bsize != BLOCK_8X8) break;
+      case PARTITION_NONE:
+      case PARTITION_HORZ:
+      case PARTITION_VERT:
+        update_partition_context(xd, mi_row, mi_col, subsize, bsize);
+        break;
+      case PARTITION_HORZ_A:
+        update_partition_context(xd, mi_row, mi_col, bsize2, subsize);
+        update_partition_context(xd, mi_row + hbs, mi_col, subsize, subsize);
+        break;
+      case PARTITION_HORZ_B:
+        update_partition_context(xd, mi_row, mi_col, subsize, subsize);
+        update_partition_context(xd, mi_row + hbs, mi_col, bsize2, subsize);
+        break;
+      case PARTITION_VERT_A:
+        update_partition_context(xd, mi_row, mi_col, bsize2, subsize);
+        update_partition_context(xd, mi_row, mi_col + hbs, subsize, subsize);
+        break;
+      case PARTITION_VERT_B:
+        update_partition_context(xd, mi_row, mi_col, subsize, subsize);
+        update_partition_context(xd, mi_row, mi_col + hbs, bsize2, subsize);
+        break;
+      default: assert(0 && "Invalid partition type");
+    }
+  }
+}
+#endif  // CONFIG_EXT_PARTITION_TYPES
+
+static INLINE int partition_plane_context(const MACROBLOCKD *xd, int mi_row,
+                                          int mi_col, BLOCK_SIZE bsize) {
+  const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col;
+  const PARTITION_CONTEXT *left_ctx =
+      xd->left_seg_context + (mi_row & MAX_MIB_MASK);
+  const int bsl = mi_width_log2_lookup[bsize];
+  int above = (*above_ctx >> bsl) & 1, left = (*left_ctx >> bsl) & 1;
+
+  assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]);
+  assert(bsl >= 0);
+
+  return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
+}
+
+static INLINE void vp10_zero_above_context(VP10_COMMON *const cm,
+                                           int mi_col_start, int mi_col_end) {
+  const int width = mi_col_end - mi_col_start;
+
+  const int offset_y = 2 * mi_col_start;
+  const int width_y = 2 * width;
+  const int offset_uv = offset_y >> cm->subsampling_x;
+  const int width_uv = width_y >> cm->subsampling_x;
+
+  vp10_zero_array(cm->above_context[0] + offset_y, width_y);
+  vp10_zero_array(cm->above_context[1] + offset_uv, width_uv);
+  vp10_zero_array(cm->above_context[2] + offset_uv, width_uv);
+
+  vp10_zero_array(cm->above_seg_context + mi_col_start, width);
+
+#if CONFIG_VAR_TX
+  vp10_zero_array(cm->above_txfm_context + mi_col_start, width);
+#endif  // CONFIG_VAR_TX
+}
+
+static INLINE void vp10_zero_left_context(MACROBLOCKD *const xd) {
+  vp10_zero(xd->left_context);
+  vp10_zero(xd->left_seg_context);
+#if CONFIG_VAR_TX
+  vp10_zero(xd->left_txfm_context_buffer);
+#endif
+}
+
+#if CONFIG_VAR_TX
+static INLINE void set_txfm_ctx(TXFM_CONTEXT *txfm_ctx, TX_SIZE tx_size,
+                                int len) {
+  int i;
+  for (i = 0; i < len; ++i) txfm_ctx[i] = tx_size;
+}
+
+static INLINE void set_txfm_ctxs(TX_SIZE tx_size, int n8_w, int n8_h,
+                                 const MACROBLOCKD *xd) {
+  set_txfm_ctx(xd->above_txfm_context, txsize_horz_map[tx_size], n8_w);
+  set_txfm_ctx(xd->left_txfm_context, txsize_vert_map[tx_size], n8_h);
+}
+
+static INLINE void txfm_partition_update(TXFM_CONTEXT *above_ctx,
+                                         TXFM_CONTEXT *left_ctx,
+                                         TX_SIZE tx_size) {
+  BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+  int bs = num_8x8_blocks_high_lookup[bsize];
+  int i;
+  for (i = 0; i < bs; ++i) {
+    above_ctx[i] = tx_size;
+    left_ctx[i] = tx_size;
+  }
+}
+
+static INLINE int txfm_partition_context(TXFM_CONTEXT *above_ctx,
+                                         TXFM_CONTEXT *left_ctx,
+                                         TX_SIZE tx_size) {
+  int above = *above_ctx < tx_size;
+  int left = *left_ctx < tx_size;
+  return (tx_size - 1) * 3 + above + left;
+}
+#endif
+
+static INLINE PARTITION_TYPE get_partition(const VP10_COMMON *const cm,
+                                           const int mi_row, const int mi_col,
+                                           const BLOCK_SIZE bsize) {
+  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) {
+    return PARTITION_INVALID;
+  } else {
+    const int offset = mi_row * cm->mi_stride + mi_col;
+    MODE_INFO **mi = cm->mi_grid_visible + offset;
+    const MB_MODE_INFO *const mbmi = &mi[0]->mbmi;
+    const int bsl = b_width_log2_lookup[bsize];
+    const PARTITION_TYPE partition = partition_lookup[bsl][mbmi->sb_type];
+#if !CONFIG_EXT_PARTITION_TYPES
+    return partition;
+#else
+    const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
+
+    assert(cm->mi_grid_visible[offset] == &cm->mi[offset]);
+
+    if (partition != PARTITION_NONE && bsize > BLOCK_8X8 &&
+        mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) {
+      const BLOCK_SIZE h = get_subsize(bsize, PARTITION_HORZ_A);
+      const BLOCK_SIZE v = get_subsize(bsize, PARTITION_VERT_A);
+      const MB_MODE_INFO *const mbmi_right = &mi[hbs]->mbmi;
+      const MB_MODE_INFO *const mbmi_below = &mi[hbs * cm->mi_stride]->mbmi;
+      if (mbmi->sb_type == h) {
+        return mbmi_below->sb_type == h ? PARTITION_HORZ : PARTITION_HORZ_B;
+      } else if (mbmi->sb_type == v) {
+        return mbmi_right->sb_type == v ? PARTITION_VERT : PARTITION_VERT_B;
+      } else if (mbmi_below->sb_type == h) {
+        return PARTITION_HORZ_A;
+      } else if (mbmi_right->sb_type == v) {
+        return PARTITION_VERT_A;
+      } else {
+        return PARTITION_SPLIT;
+      }
+    }
+
+    return partition;
+#endif  // !CONFIG_EXT_PARTITION_TYPES
+  }
+}
+
+static INLINE void set_sb_size(VP10_COMMON *const cm,
+                               const BLOCK_SIZE sb_size) {
+  cm->sb_size = sb_size;
+  cm->mib_size = num_8x8_blocks_wide_lookup[cm->sb_size];
+  cm->mib_size_log2 = mi_width_log2_lookup[cm->sb_size];
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_ONYXC_INT_H_
diff --git a/av1/common/pred_common.c b/av1/common/pred_common.c
new file mode 100644
index 0000000..0e1045e
--- /dev/null
+++ b/av1/common/pred_common.c
@@ -0,0 +1,1384 @@
+
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "av1/common/common.h"
+#include "av1/common/pred_common.h"
+#include "av1/common/reconinter.h"
+#include "av1/common/seg_common.h"
+
+// Returns a context number for the given MB prediction signal
+#if CONFIG_DUAL_FILTER
+static INTERP_FILTER get_ref_filter_type(const MODE_INFO *mi,
+                                         const MACROBLOCKD *xd, int dir,
+                                         MV_REFERENCE_FRAME ref_frame) {
+  INTERP_FILTER ref_type = SWITCHABLE_FILTERS;
+  const MB_MODE_INFO *ref_mbmi = &mi->mbmi;
+  int use_subpel[2] = {
+    has_subpel_mv_component(mi, xd, dir),
+    has_subpel_mv_component(mi, xd, dir + 2),
+  };
+
+  if (ref_mbmi->ref_frame[0] == ref_frame && use_subpel[0])
+    ref_type = ref_mbmi->interp_filter[(dir & 0x01)];
+  else if (ref_mbmi->ref_frame[1] == ref_frame && use_subpel[1])
+    ref_type = ref_mbmi->interp_filter[(dir & 0x01) + 2];
+
+  return ref_type;
+}
+
+int vp10_get_pred_context_switchable_interp(const MACROBLOCKD *xd, int dir) {
+  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  const int ctx_offset =
+      (mbmi->ref_frame[1] > INTRA_FRAME) * INTER_FILTER_COMP_OFFSET;
+  MV_REFERENCE_FRAME ref_frame =
+      (dir < 2) ? mbmi->ref_frame[0] : mbmi->ref_frame[1];
+  // Note:
+  // The mode info data structure has a one element border above and to the
+  // left of the entries corresponding to real macroblocks.
+  // The prediction flags in these dummy entries are initialized to 0.
+  int filter_type_ctx = ctx_offset + (dir & 0x01) * INTER_FILTER_DIR_OFFSET;
+  int left_type = SWITCHABLE_FILTERS;
+  int above_type = SWITCHABLE_FILTERS;
+
+  if (xd->left_available)
+    left_type = get_ref_filter_type(xd->mi[-1], xd, dir, ref_frame);
+
+  if (xd->up_available)
+    above_type =
+        get_ref_filter_type(xd->mi[-xd->mi_stride], xd, dir, ref_frame);
+
+  if (left_type == above_type)
+    filter_type_ctx += left_type;
+  else if (left_type == SWITCHABLE_FILTERS && above_type != SWITCHABLE_FILTERS)
+    filter_type_ctx += above_type;
+  else if (left_type != SWITCHABLE_FILTERS && above_type == SWITCHABLE_FILTERS)
+    filter_type_ctx += left_type;
+  else
+    filter_type_ctx += SWITCHABLE_FILTERS;
+
+  return filter_type_ctx;
+}
+#else
+int vp10_get_pred_context_switchable_interp(const MACROBLOCKD *xd) {
+  // Note:
+  // The mode info data structure has a one element border above and to the
+  // left of the entries corresponding to real macroblocks.
+  // The prediction flags in these dummy entries are initialized to 0.
+  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+  const int left_type = xd->left_available && is_inter_block(left_mbmi)
+                            ? left_mbmi->interp_filter
+                            : SWITCHABLE_FILTERS;
+  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+  const int above_type = xd->up_available && is_inter_block(above_mbmi)
+                             ? above_mbmi->interp_filter
+                             : SWITCHABLE_FILTERS;
+
+  if (left_type == above_type)
+    return left_type;
+  else if (left_type == SWITCHABLE_FILTERS && above_type != SWITCHABLE_FILTERS)
+    return above_type;
+  else if (left_type != SWITCHABLE_FILTERS && above_type == SWITCHABLE_FILTERS)
+    return left_type;
+  else
+    return SWITCHABLE_FILTERS;
+}
+#endif
+
+#if CONFIG_EXT_INTRA
+// Obtain the reference filter type from the above/left neighbor blocks.
+static INTRA_FILTER get_ref_intra_filter(const MB_MODE_INFO *ref_mbmi) {
+  INTRA_FILTER ref_type = INTRA_FILTERS;
+
+  if (ref_mbmi->sb_type >= BLOCK_8X8) {
+    PREDICTION_MODE mode = ref_mbmi->mode;
+    if (is_inter_block(ref_mbmi)) {
+#if CONFIG_DUAL_FILTER
+      switch (ref_mbmi->interp_filter[0]) {
+#else
+      switch (ref_mbmi->interp_filter) {
+#endif
+        case EIGHTTAP_REGULAR: ref_type = INTRA_FILTER_8TAP; break;
+        case EIGHTTAP_SMOOTH: ref_type = INTRA_FILTER_8TAP_SMOOTH; break;
+        case MULTITAP_SHARP: ref_type = INTRA_FILTER_8TAP_SHARP; break;
+        case BILINEAR: ref_type = INTRA_FILTERS; break;
+        default: break;
+      }
+    } else {
+      if (mode != DC_PRED && mode != TM_PRED) {
+        int p_angle =
+            mode_to_angle_map[mode] + ref_mbmi->angle_delta[0] * ANGLE_STEP;
+        if (vp10_is_intra_filter_switchable(p_angle)) {
+          ref_type = ref_mbmi->intra_filter;
+        }
+      }
+    }
+  }
+  return ref_type;
+}
+
+int vp10_get_pred_context_intra_interp(const MACROBLOCKD *xd) {
+  int left_type = INTRA_FILTERS, above_type = INTRA_FILTERS;
+
+  if (xd->left_available) left_type = get_ref_intra_filter(xd->left_mbmi);
+
+  if (xd->up_available) above_type = get_ref_intra_filter(xd->above_mbmi);
+
+  if (left_type == above_type)
+    return left_type;
+  else if (left_type == INTRA_FILTERS && above_type != INTRA_FILTERS)
+    return above_type;
+  else if (left_type != INTRA_FILTERS && above_type == INTRA_FILTERS)
+    return left_type;
+  else
+    return INTRA_FILTERS;
+}
+#endif  // CONFIG_EXT_INTRA
+
+// The mode info data structure has a one element border above and to the
+// left of the entries corresponding to real macroblocks.
+// The prediction flags in these dummy entries are initialized to 0.
+// 0 - inter/inter, inter/--, --/inter, --/--
+// 1 - intra/inter, inter/intra
+// 2 - intra/--, --/intra
+// 3 - intra/intra
+int vp10_get_intra_inter_context(const MACROBLOCKD *xd) {
+  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+  const int has_above = xd->up_available;
+  const int has_left = xd->left_available;
+
+  if (has_above && has_left) {  // both edges available
+    const int above_intra = !is_inter_block(above_mbmi);
+    const int left_intra = !is_inter_block(left_mbmi);
+    return left_intra && above_intra ? 3 : left_intra || above_intra;
+  } else if (has_above || has_left) {  // one edge available
+    return 2 * !is_inter_block(has_above ? above_mbmi : left_mbmi);
+  } else {
+    return 0;
+  }
+}
+
+#if CONFIG_EXT_REFS
+
+#define CHECK_BWDREF_OR_ALTREF(ref_frame) \
+  (((ref_frame) == BWDREF_FRAME) || ((ref_frame) == ALTREF_FRAME))
+
+int vp10_get_reference_mode_context(const VP10_COMMON *cm,
+                                    const MACROBLOCKD *xd) {
+  int ctx;
+  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+  const int has_above = xd->up_available;
+  const int has_left = xd->left_available;
+
+  (void)cm;
+
+  // Note:
+  // The mode info data structure has a one element border above and to the
+  // left of the entries corresponding to real macroblocks.
+  // The prediction flags in these dummy entries are initialized to 0.
+  if (has_above && has_left) {  // both edges available
+    if (!has_second_ref(above_mbmi) && !has_second_ref(left_mbmi))
+      // neither edge uses comp pred (0/1)
+      ctx = CHECK_BWDREF_OR_ALTREF(above_mbmi->ref_frame[0]) ^
+            CHECK_BWDREF_OR_ALTREF(left_mbmi->ref_frame[0]);
+    else if (!has_second_ref(above_mbmi))
+      // one of two edges uses comp pred (2/3)
+      ctx = 2 + (CHECK_BWDREF_OR_ALTREF(above_mbmi->ref_frame[0]) ||
+                 !is_inter_block(above_mbmi));
+    else if (!has_second_ref(left_mbmi))
+      // one of two edges uses comp pred (2/3)
+      ctx = 2 + (CHECK_BWDREF_OR_ALTREF(left_mbmi->ref_frame[0]) ||
+                 !is_inter_block(left_mbmi));
+    else  // both edges use comp pred (4)
+      ctx = 4;
+  } else if (has_above || has_left) {  // one edge available
+    const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+
+    if (!has_second_ref(edge_mbmi))
+      // edge does not use comp pred (0/1)
+      ctx = CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[0]);
+    else
+      // edge uses comp pred (3)
+      ctx = 3;
+  } else {  // no edges available (1)
+    ctx = 1;
+  }
+  assert(ctx >= 0 && ctx < COMP_INTER_CONTEXTS);
+  return ctx;
+}
+
+#else  // CONFIG_EXT_REFS
+
+int vp10_get_reference_mode_context(const VP10_COMMON *cm,
+                                    const MACROBLOCKD *xd) {
+  int ctx;
+  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+  const int has_above = xd->up_available;
+  const int has_left = xd->left_available;
+  // Note:
+  // The mode info data structure has a one element border above and to the
+  // left of the entries corresponding to real macroblocks.
+  // The prediction flags in these dummy entries are initialized to 0.
+  if (has_above && has_left) {  // both edges available
+    if (!has_second_ref(above_mbmi) && !has_second_ref(left_mbmi))
+      // neither edge uses comp pred (0/1)
+      ctx = (above_mbmi->ref_frame[0] == cm->comp_fixed_ref) ^
+            (left_mbmi->ref_frame[0] == cm->comp_fixed_ref);
+    else if (!has_second_ref(above_mbmi))
+      // one of two edges uses comp pred (2/3)
+      ctx = 2 + (above_mbmi->ref_frame[0] == cm->comp_fixed_ref ||
+                 !is_inter_block(above_mbmi));
+    else if (!has_second_ref(left_mbmi))
+      // one of two edges uses comp pred (2/3)
+      ctx = 2 + (left_mbmi->ref_frame[0] == cm->comp_fixed_ref ||
+                 !is_inter_block(left_mbmi));
+    else  // both edges use comp pred (4)
+      ctx = 4;
+  } else if (has_above || has_left) {  // one edge available
+    const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+
+    if (!has_second_ref(edge_mbmi))
+      // edge does not use comp pred (0/1)
+      ctx = edge_mbmi->ref_frame[0] == cm->comp_fixed_ref;
+    else
+      // edge uses comp pred (3)
+      ctx = 3;
+  } else {  // no edges available (1)
+    ctx = 1;
+  }
+  assert(ctx >= 0 && ctx < COMP_INTER_CONTEXTS);
+  return ctx;
+}
+
+#endif  // CONFIG_EXT_REFS
+
+#if CONFIG_EXT_REFS
+
+// TODO(zoeliu): Future work will be conducted to optimize the context design
+//               for the coding of the reference frames.
+
+#define CHECK_LAST_OR_LAST2(ref_frame) \
+  ((ref_frame == LAST_FRAME) || (ref_frame == LAST2_FRAME))
+
+#define CHECK_GOLDEN_OR_LAST3(ref_frame) \
+  ((ref_frame == GOLDEN_FRAME) || (ref_frame == LAST3_FRAME))
+
+// Returns a context number for the given MB prediction signal
+// Signal the first reference frame for a compound mode be either
+// GOLDEN/LAST3, or LAST/LAST2.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is either
+//               GOLDEN_FRAME or LAST3_FRAME.
+int vp10_get_pred_context_comp_ref_p(const VP10_COMMON *cm,
+                                     const MACROBLOCKD *xd) {
+  int pred_context;
+  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+  const int above_in_image = xd->up_available;
+  const int left_in_image = xd->left_available;
+
+  // Note:
+  // The mode info data structure has a one element border above and to the
+  // left of the entries correpsonding to real macroblocks.
+  // The prediction flags in these dummy entries are initialised to 0.
+  const int bwd_ref_sign_idx = cm->ref_frame_sign_bias[cm->comp_bwd_ref[0]];
+  const int fwd_ref_sign_idx = !bwd_ref_sign_idx;
+
+  if (above_in_image && left_in_image) {  // both edges available
+    const int above_intra = !is_inter_block(above_mbmi);
+    const int left_intra = !is_inter_block(left_mbmi);
+
+    if (above_intra && left_intra) {  // intra/intra (2)
+      pred_context = 2;
+    } else if (above_intra || left_intra) {  // intra/inter
+      const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+
+      if (!has_second_ref(edge_mbmi))  // single pred (1/3)
+        pred_context =
+            1 + 2 * (!CHECK_GOLDEN_OR_LAST3(edge_mbmi->ref_frame[0]));
+      else  // comp pred (1/3)
+        pred_context = 1 +
+                       2 * (!CHECK_GOLDEN_OR_LAST3(
+                               edge_mbmi->ref_frame[fwd_ref_sign_idx]));
+    } else {  // inter/inter
+      const int l_sg = !has_second_ref(left_mbmi);
+      const int a_sg = !has_second_ref(above_mbmi);
+      const MV_REFERENCE_FRAME frfa =
+          a_sg ? above_mbmi->ref_frame[0]
+               : above_mbmi->ref_frame[fwd_ref_sign_idx];
+      const MV_REFERENCE_FRAME frfl =
+          l_sg ? left_mbmi->ref_frame[0]
+               : left_mbmi->ref_frame[fwd_ref_sign_idx];
+
+      if (frfa == frfl && CHECK_GOLDEN_OR_LAST3(frfa)) {
+        pred_context = 0;
+      } else if (l_sg && a_sg) {  // single/single
+        if ((CHECK_BWDREF_OR_ALTREF(frfa) && CHECK_LAST_OR_LAST2(frfl)) ||
+            (CHECK_BWDREF_OR_ALTREF(frfl) && CHECK_LAST_OR_LAST2(frfa))) {
+          pred_context = 4;
+        } else if (CHECK_GOLDEN_OR_LAST3(frfa) || CHECK_GOLDEN_OR_LAST3(frfl)) {
+          pred_context = 1;
+        } else {
+          pred_context = 3;
+        }
+      } else if (l_sg || a_sg) {  // single/comp
+        const MV_REFERENCE_FRAME frfc = l_sg ? frfa : frfl;
+        const MV_REFERENCE_FRAME rfs = a_sg ? frfa : frfl;
+
+        if (CHECK_GOLDEN_OR_LAST3(frfc) && !CHECK_GOLDEN_OR_LAST3(rfs))
+          pred_context = 1;
+        else if (CHECK_GOLDEN_OR_LAST3(rfs) && !CHECK_GOLDEN_OR_LAST3(frfc))
+          pred_context = 2;
+        else
+          pred_context = 4;
+      } else {  // comp/comp
+        if ((CHECK_LAST_OR_LAST2(frfa) && CHECK_LAST_OR_LAST2(frfl))) {
+          pred_context = 4;
+        } else {
+          // NOTE(zoeliu): Following assert may be removed once confirmed.
+          assert(CHECK_GOLDEN_OR_LAST3(frfa) || CHECK_GOLDEN_OR_LAST3(frfl));
+          pred_context = 2;
+        }
+      }
+    }
+  } else if (above_in_image || left_in_image) {  // one edge available
+    const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
+
+    if (!is_inter_block(edge_mbmi)) {
+      pred_context = 2;
+    } else {
+      if (has_second_ref(edge_mbmi))
+        pred_context =
+            4 *
+            (!CHECK_GOLDEN_OR_LAST3(edge_mbmi->ref_frame[fwd_ref_sign_idx]));
+      else
+        pred_context = 3 * (!CHECK_GOLDEN_OR_LAST3(edge_mbmi->ref_frame[0]));
+    }
+  } else {  // no edges available (2)
+    pred_context = 2;
+  }
+
+  assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+
+  return pred_context;
+}
+
+// Returns a context number for the given MB prediction signal
+// Signal the first reference frame for a compound mode be LAST,
+// conditioning on that it is known either LAST/LAST2.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is LAST_FRAME,
+// conditioning on it is either LAST_FRAME or LAST2_FRAME.
+int vp10_get_pred_context_comp_ref_p1(const VP10_COMMON *cm,
+                                      const MACROBLOCKD *xd) {
+  int pred_context;
+  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+  const int above_in_image = xd->up_available;
+  const int left_in_image = xd->left_available;
+
+  // Note:
+  // The mode info data structure has a one element border above and to the
+  // left of the entries correpsonding to real macroblocks.
+  // The prediction flags in these dummy entries are initialised to 0.
+  const int bwd_ref_sign_idx = cm->ref_frame_sign_bias[cm->comp_bwd_ref[0]];
+  const int fwd_ref_sign_idx = !bwd_ref_sign_idx;
+
+  if (above_in_image && left_in_image) {  // both edges available
+    const int above_intra = !is_inter_block(above_mbmi);
+    const int left_intra = !is_inter_block(left_mbmi);
+
+    if (above_intra && left_intra) {  // intra/intra (2)
+      pred_context = 2;
+    } else if (above_intra || left_intra) {  // intra/inter
+      const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+
+      if (!has_second_ref(edge_mbmi))  // single pred (1/3)
+        pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != LAST_FRAME);
+      else  // comp pred (1/3)
+        pred_context =
+            1 + 2 * (edge_mbmi->ref_frame[fwd_ref_sign_idx] != LAST_FRAME);
+    } else {  // inter/inter
+      const int l_sg = !has_second_ref(left_mbmi);
+      const int a_sg = !has_second_ref(above_mbmi);
+      const MV_REFERENCE_FRAME frfa =
+          a_sg ? above_mbmi->ref_frame[0]
+               : above_mbmi->ref_frame[fwd_ref_sign_idx];
+      const MV_REFERENCE_FRAME frfl =
+          l_sg ? left_mbmi->ref_frame[0]
+               : left_mbmi->ref_frame[fwd_ref_sign_idx];
+
+      if (frfa == frfl && frfa == LAST_FRAME)
+        pred_context = 0;
+      else if (l_sg && a_sg) {  // single/single
+        if (frfa == LAST_FRAME || frfl == LAST_FRAME)
+          pred_context = 1;
+        else if (CHECK_GOLDEN_OR_LAST3(frfa) || CHECK_GOLDEN_OR_LAST3(frfl))
+          pred_context = 2 + (frfa != frfl);
+        else if (frfa == frfl ||
+                 (CHECK_BWDREF_OR_ALTREF(frfa) && CHECK_BWDREF_OR_ALTREF(frfl)))
+          pred_context = 3;
+        else
+          pred_context = 4;
+      } else if (l_sg || a_sg) {  // single/comp
+        const MV_REFERENCE_FRAME frfc = l_sg ? frfa : frfl;
+        const MV_REFERENCE_FRAME rfs = a_sg ? frfa : frfl;
+
+        if (frfc == LAST_FRAME && rfs != LAST_FRAME)
+          pred_context = 1;
+        else if (rfs == LAST_FRAME && frfc != LAST_FRAME)
+          pred_context = 2;
+        else
+          pred_context =
+              3 + (frfc == LAST2_FRAME || CHECK_GOLDEN_OR_LAST3(rfs));
+      } else {  // comp/comp
+        if (frfa == LAST_FRAME || frfl == LAST_FRAME)
+          pred_context = 2;
+        else
+          pred_context =
+              3 + (CHECK_GOLDEN_OR_LAST3(frfa) || CHECK_GOLDEN_OR_LAST3(frfl));
+      }
+    }
+  } else if (above_in_image || left_in_image) {  // one edge available
+    const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
+
+    if (!is_inter_block(edge_mbmi)) {
+      pred_context = 2;
+    } else {
+      if (has_second_ref(edge_mbmi)) {
+        pred_context =
+            4 * (edge_mbmi->ref_frame[fwd_ref_sign_idx] != LAST_FRAME);
+      } else {
+        if (edge_mbmi->ref_frame[0] == LAST_FRAME)
+          pred_context = 0;
+        else
+          pred_context = 2 + CHECK_GOLDEN_OR_LAST3(edge_mbmi->ref_frame[0]);
+      }
+    }
+  } else {  // no edges available (2)
+    pred_context = 2;
+  }
+
+  assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+
+  return pred_context;
+}
+
+// Returns a context number for the given MB prediction signal
+// Signal the first reference frame for a compound mode be GOLDEN,
+// conditioning on that it is known either GOLDEN or LAST3.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is GOLDEN_FRAME,
+// conditioning on it is either GOLDEN or LAST3.
+int vp10_get_pred_context_comp_ref_p2(const VP10_COMMON *cm,
+                                      const MACROBLOCKD *xd) {
+  int pred_context;
+  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+  const int above_in_image = xd->up_available;
+  const int left_in_image = xd->left_available;
+
+  // Note:
+  // The mode info data structure has a one element border above and to the
+  // left of the entries correpsonding to real macroblocks.
+  // The prediction flags in these dummy entries are initialised to 0.
+  const int bwd_ref_sign_idx = cm->ref_frame_sign_bias[cm->comp_bwd_ref[0]];
+  const int fwd_ref_sign_idx = !bwd_ref_sign_idx;
+
+  if (above_in_image && left_in_image) {  // both edges available
+    const int above_intra = !is_inter_block(above_mbmi);
+    const int left_intra = !is_inter_block(left_mbmi);
+
+    if (above_intra && left_intra) {  // intra/intra (2)
+      pred_context = 2;
+    } else if (above_intra || left_intra) {  // intra/inter
+      const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+
+      if (!has_second_ref(edge_mbmi))  // single pred (1/3)
+        pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != GOLDEN_FRAME);
+      else  // comp pred (1/3)
+        pred_context =
+            1 + 2 * (edge_mbmi->ref_frame[fwd_ref_sign_idx] != GOLDEN_FRAME);
+    } else {  // inter/inter
+      const int l_sg = !has_second_ref(left_mbmi);
+      const int a_sg = !has_second_ref(above_mbmi);
+      const MV_REFERENCE_FRAME frfa =
+          a_sg ? above_mbmi->ref_frame[0]
+               : above_mbmi->ref_frame[fwd_ref_sign_idx];
+      const MV_REFERENCE_FRAME frfl =
+          l_sg ? left_mbmi->ref_frame[0]
+               : left_mbmi->ref_frame[fwd_ref_sign_idx];
+
+      if (frfa == frfl && frfa == GOLDEN_FRAME)
+        pred_context = 0;
+      else if (l_sg && a_sg) {  // single/single
+        if (frfa == GOLDEN_FRAME || frfl == GOLDEN_FRAME)
+          pred_context = 1;
+        else if (CHECK_LAST_OR_LAST2(frfa) || CHECK_LAST_OR_LAST2(frfl))
+          pred_context = 2 + (frfa != frfl);
+        else if (frfa == frfl ||
+                 (CHECK_BWDREF_OR_ALTREF(frfa) && CHECK_BWDREF_OR_ALTREF(frfl)))
+          pred_context = 3;
+        else
+          pred_context = 4;
+      } else if (l_sg || a_sg) {  // single/comp
+        const MV_REFERENCE_FRAME frfc = l_sg ? frfa : frfl;
+        const MV_REFERENCE_FRAME rfs = a_sg ? frfa : frfl;
+
+        if (frfc == GOLDEN_FRAME && rfs != GOLDEN_FRAME)
+          pred_context = 1;
+        else if (rfs == GOLDEN_FRAME && frfc != GOLDEN_FRAME)
+          pred_context = 2;
+        else
+          pred_context = 3 + (frfc == LAST3_FRAME || CHECK_LAST_OR_LAST2(rfs));
+      } else {  // comp/comp
+        if (frfa == GOLDEN_FRAME || frfl == GOLDEN_FRAME)
+          pred_context = 2;
+        else
+          pred_context =
+              3 + (CHECK_LAST_OR_LAST2(frfa) || CHECK_LAST_OR_LAST2(frfl));
+      }
+    }
+  } else if (above_in_image || left_in_image) {  // one edge available
+    const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
+
+    if (!is_inter_block(edge_mbmi)) {
+      pred_context = 2;
+    } else {
+      if (has_second_ref(edge_mbmi)) {
+        pred_context =
+            4 * (edge_mbmi->ref_frame[fwd_ref_sign_idx] != GOLDEN_FRAME);
+      } else {
+        if (edge_mbmi->ref_frame[0] == GOLDEN_FRAME)
+          pred_context = 0;
+        else
+          pred_context = 2 + CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]);
+      }
+    }
+  } else {  // no edges available (2)
+    pred_context = 2;
+  }
+
+  assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+
+  return pred_context;
+}
+
+// Returns a context number for the given MB prediction signal
+int vp10_get_pred_context_comp_bwdref_p(const VP10_COMMON *cm,
+                                        const MACROBLOCKD *xd) {
+  int pred_context;
+  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+  const int above_in_image = xd->up_available;
+  const int left_in_image = xd->left_available;
+
+  // Note:
+  // The mode info data structure has a one element border above and to the
+  // left of the entries corresponding to real macroblocks.
+  // The prediction flags in these dummy entries are initialized to 0.
+  const int bwd_ref_sign_idx = cm->ref_frame_sign_bias[cm->comp_bwd_ref[0]];
+  const int fwd_ref_sign_idx = !bwd_ref_sign_idx;
+
+  if (above_in_image && left_in_image) {  // both edges available
+    const int above_intra = !is_inter_block(above_mbmi);
+    const int left_intra = !is_inter_block(left_mbmi);
+
+    if (above_intra && left_intra) {  // intra/intra (2)
+      pred_context = 2;
+    } else if (above_intra || left_intra) {  // intra/inter
+      const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+
+      if (!has_second_ref(edge_mbmi))  // single pred (1/3)
+        pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != cm->comp_bwd_ref[1]);
+      else  // comp pred (1/3)
+        pred_context =
+            1 +
+            2 * (edge_mbmi->ref_frame[bwd_ref_sign_idx] != cm->comp_bwd_ref[1]);
+    } else {  // inter/inter
+      const int l_comp = has_second_ref(left_mbmi);
+      const int a_comp = has_second_ref(above_mbmi);
+
+      const MV_REFERENCE_FRAME l_brf =
+          l_comp ? left_mbmi->ref_frame[bwd_ref_sign_idx] : NONE;
+      const MV_REFERENCE_FRAME a_brf =
+          a_comp ? above_mbmi->ref_frame[bwd_ref_sign_idx] : NONE;
+
+      const MV_REFERENCE_FRAME l_frf =
+          !l_comp ? left_mbmi->ref_frame[0]
+                  : left_mbmi->ref_frame[fwd_ref_sign_idx];
+      const MV_REFERENCE_FRAME a_frf =
+          !a_comp ? above_mbmi->ref_frame[0]
+                  : above_mbmi->ref_frame[fwd_ref_sign_idx];
+
+      if (l_comp && a_comp) {  // comp/comp
+        if (l_brf == a_brf && l_brf == cm->comp_bwd_ref[1]) {
+          pred_context = 0;
+        } else if (l_brf == cm->comp_bwd_ref[1] ||
+                   a_brf == cm->comp_bwd_ref[1]) {
+          pred_context = 1;
+        } else {
+          // NOTE: Backward ref should be either BWDREF or ALTREF.
+          assert(l_brf == a_brf && l_brf != cm->comp_bwd_ref[1]);
+          pred_context = 3;
+        }
+      } else if (!l_comp && !a_comp) {  // single/single
+        if (l_frf == a_frf && l_frf == cm->comp_bwd_ref[1]) {
+          pred_context = 0;
+        } else if (l_frf == cm->comp_bwd_ref[1] ||
+                   a_frf == cm->comp_bwd_ref[1]) {
+          pred_context = 1;
+        } else if (l_frf == a_frf) {
+          pred_context = 3;
+        } else {
+          assert(l_frf != a_frf && l_frf != cm->comp_bwd_ref[1] &&
+                 a_frf != cm->comp_bwd_ref[1]);
+          pred_context = 4;
+        }
+      } else {  // comp/single
+        assert((l_comp && !a_comp) || (!l_comp && a_comp));
+
+        if ((l_comp && l_brf == cm->comp_bwd_ref[1] &&
+             a_frf == cm->comp_bwd_ref[1]) ||
+            (a_comp && a_brf == cm->comp_bwd_ref[1] &&
+             l_frf == cm->comp_bwd_ref[1])) {
+          pred_context = 1;
+        } else if ((l_comp && l_brf == cm->comp_bwd_ref[1]) ||
+                   (a_comp && a_brf == cm->comp_bwd_ref[1]) ||
+                   (!l_comp && l_frf == cm->comp_bwd_ref[1]) ||
+                   (!a_comp && a_frf == cm->comp_bwd_ref[1])) {
+          pred_context = 2;
+        } else {
+          pred_context = 4;
+        }
+      }
+    }
+  } else if (above_in_image || left_in_image) {  // one edge available
+    const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
+
+    if (!is_inter_block(edge_mbmi)) {
+      pred_context = 2;
+    } else {
+      if (has_second_ref(edge_mbmi)) {
+        pred_context =
+            4 * (edge_mbmi->ref_frame[bwd_ref_sign_idx] != cm->comp_bwd_ref[1]);
+      } else {
+        pred_context = 3 * (edge_mbmi->ref_frame[0] != cm->comp_bwd_ref[1]);
+      }
+    }
+  } else {  // no edges available (2)
+    pred_context = 2;
+  }
+  assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+
+  return pred_context;
+}
+
+#else  // CONFIG_EXT_REFS
+
+// Returns a context number for the given MB prediction signal
+int vp10_get_pred_context_comp_ref_p(const VP10_COMMON *cm,
+                                     const MACROBLOCKD *xd) {
+  int pred_context;
+  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+  const int above_in_image = xd->up_available;
+  const int left_in_image = xd->left_available;
+
+  // Note:
+  // The mode info data structure has a one element border above and to the
+  // left of the entries corresponding to real macroblocks.
+  // The prediction flags in these dummy entries are initialized to 0.
+  const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
+  const int var_ref_idx = !fix_ref_idx;
+
+  if (above_in_image && left_in_image) {  // both edges available
+    const int above_intra = !is_inter_block(above_mbmi);
+    const int left_intra = !is_inter_block(left_mbmi);
+
+    if (above_intra && left_intra) {  // intra/intra (2)
+      pred_context = 2;
+    } else if (above_intra || left_intra) {  // intra/inter
+      const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+
+      if (!has_second_ref(edge_mbmi))  // single pred (1/3)
+        pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != cm->comp_var_ref[1]);
+      else  // comp pred (1/3)
+        pred_context =
+            1 + 2 * (edge_mbmi->ref_frame[var_ref_idx] != cm->comp_var_ref[1]);
+    } else {  // inter/inter
+      const int l_sg = !has_second_ref(left_mbmi);
+      const int a_sg = !has_second_ref(above_mbmi);
+      const MV_REFERENCE_FRAME vrfa =
+          a_sg ? above_mbmi->ref_frame[0] : above_mbmi->ref_frame[var_ref_idx];
+      const MV_REFERENCE_FRAME vrfl =
+          l_sg ? left_mbmi->ref_frame[0] : left_mbmi->ref_frame[var_ref_idx];
+
+      if (vrfa == vrfl && cm->comp_var_ref[1] == vrfa) {
+        pred_context = 0;
+      } else if (l_sg && a_sg) {  // single/single
+        if ((vrfa == cm->comp_fixed_ref && vrfl == cm->comp_var_ref[0]) ||
+            (vrfl == cm->comp_fixed_ref && vrfa == cm->comp_var_ref[0]))
+          pred_context = 4;
+        else if (vrfa == vrfl)
+          pred_context = 3;
+        else
+          pred_context = 1;
+      } else if (l_sg || a_sg) {  // single/comp
+        const MV_REFERENCE_FRAME vrfc = l_sg ? vrfa : vrfl;
+        const MV_REFERENCE_FRAME rfs = a_sg ? vrfa : vrfl;
+        if (vrfc == cm->comp_var_ref[1] && rfs != cm->comp_var_ref[1])
+          pred_context = 1;
+        else if (rfs == cm->comp_var_ref[1] && vrfc != cm->comp_var_ref[1])
+          pred_context = 2;
+        else
+          pred_context = 4;
+      } else if (vrfa == vrfl) {  // comp/comp
+        pred_context = 4;
+      } else {
+        pred_context = 2;
+      }
+    }
+  } else if (above_in_image || left_in_image) {  // one edge available
+    const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
+
+    if (!is_inter_block(edge_mbmi)) {
+      pred_context = 2;
+    } else {
+      if (has_second_ref(edge_mbmi))
+        pred_context =
+            4 * (edge_mbmi->ref_frame[var_ref_idx] != cm->comp_var_ref[1]);
+      else
+        pred_context = 3 * (edge_mbmi->ref_frame[0] != cm->comp_var_ref[1]);
+    }
+  } else {  // no edges available (2)
+    pred_context = 2;
+  }
+  assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+
+  return pred_context;
+}
+
+#endif  // CONFIG_EXT_REFS
+
+#if CONFIG_EXT_REFS
+
+// For the bit to signal whether the single reference is a ALTREF_FRAME
+// or a BWDREF_FRAME.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is ALTREF/BWDREF.
+int vp10_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) {
+  int pred_context;
+  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+  const int has_above = xd->up_available;
+  const int has_left = xd->left_available;
+
+  // Note:
+  // The mode info data structure has a one element border above and to the
+  // left of the entries correpsonding to real macroblocks.
+  // The prediction flags in these dummy entries are initialised to 0.
+  if (has_above && has_left) {  // both edges available
+    const int above_intra = !is_inter_block(above_mbmi);
+    const int left_intra = !is_inter_block(left_mbmi);
+
+    if (above_intra && left_intra) {  // intra/intra
+      pred_context = 2;
+    } else if (above_intra || left_intra) {  // intra/inter or inter/intra
+      const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+
+      if (!has_second_ref(edge_mbmi))
+        pred_context = 4 * (!CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[0]));
+      else
+        pred_context = 1 + (!CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[0]) ||
+                            !CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[1]));
+    } else {  // inter/inter
+      const int above_has_second = has_second_ref(above_mbmi);
+      const int left_has_second = has_second_ref(left_mbmi);
+
+      const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+      const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+      const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+      const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
+
+      if (above_has_second && left_has_second) {
+        pred_context = 1 + (!CHECK_BWDREF_OR_ALTREF(above0) ||
+                            !CHECK_BWDREF_OR_ALTREF(above1) ||
+                            !CHECK_BWDREF_OR_ALTREF(left0) ||
+                            !CHECK_BWDREF_OR_ALTREF(left1));
+      } else if (above_has_second || left_has_second) {
+        const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+        const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+        const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
+
+        if (!CHECK_BWDREF_OR_ALTREF(rfs))
+          pred_context = 3 + (!CHECK_BWDREF_OR_ALTREF(crf1) ||
+                              !CHECK_BWDREF_OR_ALTREF(crf2));
+        else
+          pred_context =
+              !CHECK_BWDREF_OR_ALTREF(crf1) || !CHECK_BWDREF_OR_ALTREF(crf2);
+      } else {
+        pred_context = 2 * (!CHECK_BWDREF_OR_ALTREF(above0)) +
+                       2 * (!CHECK_BWDREF_OR_ALTREF(left0));
+      }
+    }
+  } else if (has_above || has_left) {  // one edge available
+    const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+    if (!is_inter_block(edge_mbmi)) {  // intra
+      pred_context = 2;
+    } else {  // inter
+      if (!has_second_ref(edge_mbmi))
+        pred_context = 4 * (!CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[0]));
+      else
+        pred_context = 1 + (!CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[0]) ||
+                            !CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[1]));
+    }
+  } else {  // no edges available
+    pred_context = 2;
+  }
+
+  assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+  return pred_context;
+}
+
+// For the bit to signal whether the single reference is ALTREF_FRAME or
+// BWDREF_FRAME, knowing that it shall be either of these 2 choices.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is ALTREF_FRAME, conditioning
+// on it is either ALTREF_FRAME/BWDREF_FRAME.
+int vp10_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
+  int pred_context;
+  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+  const int has_above = xd->up_available;
+  const int has_left = xd->left_available;
+
+  // Note:
+  // The mode info data structure has a one element border above and to the
+  // left of the entries correpsonding to real macroblocks.
+  // The prediction flags in these dummy entries are initialised to 0.
+  if (has_above && has_left) {  // both edges available
+    const int above_intra = !is_inter_block(above_mbmi);
+    const int left_intra = !is_inter_block(left_mbmi);
+
+    if (above_intra && left_intra) {  // intra/intra
+      pred_context = 2;
+    } else if (above_intra || left_intra) {  // intra/inter or inter/intra
+      const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+      if (!has_second_ref(edge_mbmi)) {
+        if (!CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[0]))
+          pred_context = 3;
+        else
+          pred_context = 4 * (edge_mbmi->ref_frame[0] == BWDREF_FRAME);
+      } else {
+        pred_context = 1 +
+                       2 * (edge_mbmi->ref_frame[0] == BWDREF_FRAME ||
+                            edge_mbmi->ref_frame[1] == BWDREF_FRAME);
+      }
+    } else {  // inter/inter
+      const int above_has_second = has_second_ref(above_mbmi);
+      const int left_has_second = has_second_ref(left_mbmi);
+      const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+      const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+      const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+      const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
+
+      if (above_has_second && left_has_second) {
+        if (above0 == left0 && above1 == left1)
+          pred_context =
+              3 * (above0 == BWDREF_FRAME || above1 == BWDREF_FRAME ||
+                   left0 == BWDREF_FRAME || left1 == BWDREF_FRAME);
+        else
+          pred_context = 2;
+      } else if (above_has_second || left_has_second) {
+        const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+        const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+        const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
+
+        if (rfs == BWDREF_FRAME)
+          pred_context = 3 + (crf1 == BWDREF_FRAME || crf2 == BWDREF_FRAME);
+        else if (rfs == ALTREF_FRAME)
+          pred_context = (crf1 == BWDREF_FRAME || crf2 == BWDREF_FRAME);
+        else
+          pred_context = 1 + 2 * (crf1 == BWDREF_FRAME || crf2 == BWDREF_FRAME);
+      } else {
+        if (!CHECK_BWDREF_OR_ALTREF(above0) && !CHECK_BWDREF_OR_ALTREF(left0)) {
+          pred_context = 2 + (above0 == left0);
+        } else if (!CHECK_BWDREF_OR_ALTREF(above0) ||
+                   !CHECK_BWDREF_OR_ALTREF(left0)) {
+          const MV_REFERENCE_FRAME edge0 =
+              !CHECK_BWDREF_OR_ALTREF(above0) ? left0 : above0;
+          pred_context = 4 * (edge0 == BWDREF_FRAME);
+        } else {
+          pred_context =
+              2 * (above0 == BWDREF_FRAME) + 2 * (left0 == BWDREF_FRAME);
+        }
+      }
+    }
+  } else if (has_above || has_left) {  // one edge available
+    const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+
+    if (!is_inter_block(edge_mbmi) ||
+        (!CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[0]) &&
+         !has_second_ref(edge_mbmi)))
+      pred_context = 2;
+    else if (!has_second_ref(edge_mbmi))
+      pred_context = 4 * (edge_mbmi->ref_frame[0] == BWDREF_FRAME);
+    else
+      pred_context = 3 * (edge_mbmi->ref_frame[0] == BWDREF_FRAME ||
+                          edge_mbmi->ref_frame[1] == BWDREF_FRAME);
+  } else {  // no edges available (2)
+    pred_context = 2;
+  }
+
+  assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+  return pred_context;
+}
+
+// For the bit to signal whether the single reference is LAST3/GOLDEN or
+// LAST2/LAST, knowing that it shall be either of these 2 choices.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is LAST3/GOLDEN, conditioning
+// on it is either LAST3/GOLDEN/LAST2/LAST.
+int vp10_get_pred_context_single_ref_p3(const MACROBLOCKD *xd) {
+  int pred_context;
+  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+  const int has_above = xd->up_available;
+  const int has_left = xd->left_available;
+
+  // Note:
+  // The mode info data structure has a one element border above and to the
+  // left of the entries correpsonding to real macroblocks.
+  // The prediction flags in these dummy entries are initialised to 0.
+  if (has_above && has_left) {  // both edges available
+    const int above_intra = !is_inter_block(above_mbmi);
+    const int left_intra = !is_inter_block(left_mbmi);
+
+    if (above_intra && left_intra) {  // intra/intra
+      pred_context = 2;
+    } else if (above_intra || left_intra) {  // intra/inter or inter/intra
+      const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+      if (!has_second_ref(edge_mbmi)) {
+        if (CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[0]))
+          pred_context = 3;
+        else
+          pred_context = 4 * CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]);
+      } else {
+        pred_context = 1 +
+                       2 * (CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]) ||
+                            CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[1]));
+      }
+    } else {  // inter/inter
+      const int above_has_second = has_second_ref(above_mbmi);
+      const int left_has_second = has_second_ref(left_mbmi);
+      const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+      const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+      const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+      const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
+
+      if (above_has_second && left_has_second) {
+        if (above0 == left0 && above1 == left1)
+          pred_context =
+              3 * (CHECK_LAST_OR_LAST2(above0) || CHECK_LAST_OR_LAST2(above1) ||
+                   CHECK_LAST_OR_LAST2(left0) || CHECK_LAST_OR_LAST2(left1));
+        else
+          pred_context = 2;
+      } else if (above_has_second || left_has_second) {
+        const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+        const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+        const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
+
+        if (CHECK_LAST_OR_LAST2(rfs))
+          pred_context =
+              3 + (CHECK_LAST_OR_LAST2(crf1) || CHECK_LAST_OR_LAST2(crf2));
+        else if (CHECK_GOLDEN_OR_LAST3(rfs))
+          pred_context =
+              (CHECK_LAST_OR_LAST2(crf1) || CHECK_LAST_OR_LAST2(crf2));
+        else
+          pred_context =
+              1 + 2 * (CHECK_LAST_OR_LAST2(crf1) || CHECK_LAST_OR_LAST2(crf2));
+      } else {
+        if (CHECK_BWDREF_OR_ALTREF(above0) && CHECK_BWDREF_OR_ALTREF(left0)) {
+          pred_context = 2 + (above0 == left0);
+        } else if (CHECK_BWDREF_OR_ALTREF(above0) ||
+                   CHECK_BWDREF_OR_ALTREF(left0)) {
+          const MV_REFERENCE_FRAME edge0 =
+              CHECK_BWDREF_OR_ALTREF(above0) ? left0 : above0;
+          pred_context = 4 * CHECK_LAST_OR_LAST2(edge0);
+        } else {
+          pred_context =
+              2 * CHECK_LAST_OR_LAST2(above0) + 2 * CHECK_LAST_OR_LAST2(left0);
+        }
+      }
+    }
+  } else if (has_above || has_left) {  // one edge available
+    const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+
+    if (!is_inter_block(edge_mbmi) ||
+        (CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[0]) &&
+         !has_second_ref(edge_mbmi)))
+      pred_context = 2;
+    else if (!has_second_ref(edge_mbmi))
+      pred_context = 4 * (CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]));
+    else
+      pred_context = 3 * (CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]) ||
+                          CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[1]));
+  } else {  // no edges available (2)
+    pred_context = 2;
+  }
+
+  assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+  return pred_context;
+}
+
+// For the bit to signal whether the single reference is LAST2_FRAME or
+// LAST_FRAME, knowing that it shall be either of these 2 choices.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is LAST2_FRAME, conditioning
+// on it is either LAST2_FRAME/LAST_FRAME.
+int vp10_get_pred_context_single_ref_p4(const MACROBLOCKD *xd) {
+  int pred_context;
+  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+  const int has_above = xd->up_available;
+  const int has_left = xd->left_available;
+
+  // Note:
+  // The mode info data structure has a one element border above and to the
+  // left of the entries correpsonding to real macroblocks.
+  // The prediction flags in these dummy entries are initialised to 0.
+  if (has_above && has_left) {  // both edges available
+    const int above_intra = !is_inter_block(above_mbmi);
+    const int left_intra = !is_inter_block(left_mbmi);
+
+    if (above_intra && left_intra) {  // intra/intra
+      pred_context = 2;
+    } else if (above_intra || left_intra) {  // intra/inter or inter/intra
+      const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+      if (!has_second_ref(edge_mbmi)) {
+        if (!CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]))
+          pred_context = 3;
+        else
+          pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME);
+      } else {
+        pred_context = 1 +
+                       2 * (edge_mbmi->ref_frame[0] == LAST_FRAME ||
+                            edge_mbmi->ref_frame[1] == LAST_FRAME);
+      }
+    } else {  // inter/inter
+      const int above_has_second = has_second_ref(above_mbmi);
+      const int left_has_second = has_second_ref(left_mbmi);
+      const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+      const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+      const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+      const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
+
+      if (above_has_second && left_has_second) {
+        if (above0 == left0 && above1 == left1)
+          pred_context = 3 * (above0 == LAST_FRAME || above1 == LAST_FRAME ||
+                              left0 == LAST_FRAME || left1 == LAST_FRAME);
+        else
+          pred_context = 2;
+      } else if (above_has_second || left_has_second) {
+        const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+        const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+        const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
+
+        if (rfs == LAST_FRAME)
+          pred_context = 3 + (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
+        else if (rfs == LAST2_FRAME)
+          pred_context = (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
+        else
+          pred_context = 1 + 2 * (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
+      } else {
+        if (!CHECK_LAST_OR_LAST2(above0) && !CHECK_LAST_OR_LAST2(left0)) {
+          pred_context = 2 + (above0 == left0);
+        } else if (!CHECK_LAST_OR_LAST2(above0) ||
+                   !CHECK_LAST_OR_LAST2(left0)) {
+          const MV_REFERENCE_FRAME edge0 =
+              !CHECK_LAST_OR_LAST2(above0) ? left0 : above0;
+          pred_context = 4 * (edge0 == LAST_FRAME);
+        } else {
+          pred_context = 2 * (above0 == LAST_FRAME) + 2 * (left0 == LAST_FRAME);
+        }
+      }
+    }
+  } else if (has_above || has_left) {  // one edge available
+    const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+
+    if (!is_inter_block(edge_mbmi) ||
+        (!CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]) &&
+         !has_second_ref(edge_mbmi)))
+      pred_context = 2;
+    else if (!has_second_ref(edge_mbmi))
+      pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME);
+    else
+      pred_context = 3 * (edge_mbmi->ref_frame[0] == LAST_FRAME ||
+                          edge_mbmi->ref_frame[1] == LAST_FRAME);
+  } else {  // no edges available (2)
+    pred_context = 2;
+  }
+
+  assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+  return pred_context;
+}
+
+// For the bit to signal whether the single reference is GOLDEN_FRAME or
+// LAST3_FRAME, knowing that it shall be either of these 2 choices.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is GOLDEN_FRAME, conditioning
+// on it is either GOLDEN_FRAME/LAST3_FRAME.
+int vp10_get_pred_context_single_ref_p5(const MACROBLOCKD *xd) {
+  int pred_context;
+  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+  const int has_above = xd->up_available;
+  const int has_left = xd->left_available;
+
+  // Note:
+  // The mode info data structure has a one element border above and to the
+  // left of the entries correpsonding to real macroblocks.
+  // The prediction flags in these dummy entries are initialised to 0.
+  if (has_above && has_left) {  // both edges available
+    const int above_intra = !is_inter_block(above_mbmi);
+    const int left_intra = !is_inter_block(left_mbmi);
+
+    if (above_intra && left_intra) {  // intra/intra
+      pred_context = 2;
+    } else if (above_intra || left_intra) {  // intra/inter or inter/intra
+      const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+      if (!has_second_ref(edge_mbmi)) {
+        if (!CHECK_GOLDEN_OR_LAST3(edge_mbmi->ref_frame[0]))
+          pred_context = 3;
+        else
+          pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST3_FRAME);
+      } else {
+        pred_context = 1 +
+                       2 * (edge_mbmi->ref_frame[0] == LAST3_FRAME ||
+                            edge_mbmi->ref_frame[1] == LAST3_FRAME);
+      }
+    } else {  // inter/inter
+      const int above_has_second = has_second_ref(above_mbmi);
+      const int left_has_second = has_second_ref(left_mbmi);
+      const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+      const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+      const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+      const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
+
+      if (above_has_second && left_has_second) {
+        if (above0 == left0 && above1 == left1)
+          pred_context = 3 * (above0 == LAST3_FRAME || above1 == LAST3_FRAME ||
+                              left0 == LAST3_FRAME || left1 == LAST3_FRAME);
+        else
+          pred_context = 2;
+      } else if (above_has_second || left_has_second) {
+        const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+        const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+        const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
+
+        if (rfs == LAST3_FRAME)
+          pred_context = 3 + (crf1 == LAST3_FRAME || crf2 == LAST3_FRAME);
+        else if (rfs == GOLDEN_FRAME)
+          pred_context = (crf1 == LAST3_FRAME || crf2 == LAST3_FRAME);
+        else
+          pred_context = 1 + 2 * (crf1 == LAST3_FRAME || crf2 == LAST3_FRAME);
+      } else {
+        if (!CHECK_GOLDEN_OR_LAST3(above0) && !CHECK_GOLDEN_OR_LAST3(left0)) {
+          pred_context = 2 + (above0 == left0);
+        } else if (!CHECK_GOLDEN_OR_LAST3(above0) ||
+                   !CHECK_GOLDEN_OR_LAST3(left0)) {
+          const MV_REFERENCE_FRAME edge0 =
+              !CHECK_GOLDEN_OR_LAST3(above0) ? left0 : above0;
+          pred_context = 4 * (edge0 == LAST3_FRAME);
+        } else {
+          pred_context =
+              2 * (above0 == LAST3_FRAME) + 2 * (left0 == LAST3_FRAME);
+        }
+      }
+    }
+  } else if (has_above || has_left) {  // one edge available
+    const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+
+    if (!is_inter_block(edge_mbmi) ||
+        (!CHECK_GOLDEN_OR_LAST3(edge_mbmi->ref_frame[0]) &&
+         !has_second_ref(edge_mbmi)))
+      pred_context = 2;
+    else if (!has_second_ref(edge_mbmi))
+      pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST3_FRAME);
+    else
+      pred_context = 3 * (edge_mbmi->ref_frame[0] == LAST3_FRAME ||
+                          edge_mbmi->ref_frame[1] == LAST3_FRAME);
+  } else {  // no edges available (2)
+    pred_context = 2;
+  }
+
+  assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+  return pred_context;
+}
+
+#else  // CONFIG_EXT_REFS
+
+int vp10_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) {
+  int pred_context;
+  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+  const int has_above = xd->up_available;
+  const int has_left = xd->left_available;
+  // Note:
+  // The mode info data structure has a one element border above and to the
+  // left of the entries corresponding to real macroblocks.
+  // The prediction flags in these dummy entries are initialized to 0.
+  if (has_above && has_left) {  // both edges available
+    const int above_intra = !is_inter_block(above_mbmi);
+    const int left_intra = !is_inter_block(left_mbmi);
+
+    if (above_intra && left_intra) {  // intra/intra
+      pred_context = 2;
+    } else if (above_intra || left_intra) {  // intra/inter or inter/intra
+      const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+      if (!has_second_ref(edge_mbmi))
+        pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME);
+      else
+        pred_context = 1 + (edge_mbmi->ref_frame[0] == LAST_FRAME ||
+                            edge_mbmi->ref_frame[1] == LAST_FRAME);
+    } else {  // inter/inter
+      const int above_has_second = has_second_ref(above_mbmi);
+      const int left_has_second = has_second_ref(left_mbmi);
+      const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+      const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+      const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+      const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
+
+      if (above_has_second && left_has_second) {
+        pred_context = 1 + (above0 == LAST_FRAME || above1 == LAST_FRAME ||
+                            left0 == LAST_FRAME || left1 == LAST_FRAME);
+      } else if (above_has_second || left_has_second) {
+        const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+        const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+        const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
+
+        if (rfs == LAST_FRAME)
+          pred_context = 3 + (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
+        else
+          pred_context = (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
+      } else {
+        pred_context = 2 * (above0 == LAST_FRAME) + 2 * (left0 == LAST_FRAME);
+      }
+    }
+  } else if (has_above || has_left) {  // one edge available
+    const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+    if (!is_inter_block(edge_mbmi)) {  // intra
+      pred_context = 2;
+    } else {  // inter
+      if (!has_second_ref(edge_mbmi))
+        pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME);
+      else
+        pred_context = 1 + (edge_mbmi->ref_frame[0] == LAST_FRAME ||
+                            edge_mbmi->ref_frame[1] == LAST_FRAME);
+    }
+  } else {  // no edges available
+    pred_context = 2;
+  }
+
+  assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+  return pred_context;
+}
+
+int vp10_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
+  int pred_context;
+  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+  const int has_above = xd->up_available;
+  const int has_left = xd->left_available;
+
+  // Note:
+  // The mode info data structure has a one element border above and to the
+  // left of the entries corresponding to real macroblocks.
+  // The prediction flags in these dummy entries are initialized to 0.
+  if (has_above && has_left) {  // both edges available
+    const int above_intra = !is_inter_block(above_mbmi);
+    const int left_intra = !is_inter_block(left_mbmi);
+
+    if (above_intra && left_intra) {  // intra/intra
+      pred_context = 2;
+    } else if (above_intra || left_intra) {  // intra/inter or inter/intra
+      const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+      if (!has_second_ref(edge_mbmi)) {
+        if (edge_mbmi->ref_frame[0] == LAST_FRAME)
+          pred_context = 3;
+        else
+          pred_context = 4 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME);
+      } else {
+        pred_context = 1 +
+                       2 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME ||
+                            edge_mbmi->ref_frame[1] == GOLDEN_FRAME);
+      }
+    } else {  // inter/inter
+      const int above_has_second = has_second_ref(above_mbmi);
+      const int left_has_second = has_second_ref(left_mbmi);
+      const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+      const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+      const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+      const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
+
+      if (above_has_second && left_has_second) {
+        if (above0 == left0 && above1 == left1)
+          pred_context =
+              3 * (above0 == GOLDEN_FRAME || above1 == GOLDEN_FRAME ||
+                   left0 == GOLDEN_FRAME || left1 == GOLDEN_FRAME);
+        else
+          pred_context = 2;
+      } else if (above_has_second || left_has_second) {
+        const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+        const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+        const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
+
+        if (rfs == GOLDEN_FRAME)
+          pred_context = 3 + (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME);
+        else if (rfs != GOLDEN_FRAME && rfs != LAST_FRAME)
+          pred_context = crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME;
+        else
+          pred_context = 1 + 2 * (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME);
+      } else {
+        if (above0 == LAST_FRAME && left0 == LAST_FRAME) {
+          pred_context = 3;
+        } else if (above0 == LAST_FRAME || left0 == LAST_FRAME) {
+          const MV_REFERENCE_FRAME edge0 =
+              (above0 == LAST_FRAME) ? left0 : above0;
+          pred_context = 4 * (edge0 == GOLDEN_FRAME);
+        } else {
+          pred_context =
+              2 * (above0 == GOLDEN_FRAME) + 2 * (left0 == GOLDEN_FRAME);
+        }
+      }
+    }
+  } else if (has_above || has_left) {  // one edge available
+    const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+
+    if (!is_inter_block(edge_mbmi) ||
+        (edge_mbmi->ref_frame[0] == LAST_FRAME && !has_second_ref(edge_mbmi)))
+      pred_context = 2;
+    else if (!has_second_ref(edge_mbmi))
+      pred_context = 4 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME);
+    else
+      pred_context = 3 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME ||
+                          edge_mbmi->ref_frame[1] == GOLDEN_FRAME);
+  } else {  // no edges available (2)
+    pred_context = 2;
+  }
+  assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+  return pred_context;
+}
+
+#endif  // CONFIG_EXT_REFS
diff --git a/av1/common/pred_common.h b/av1/common/pred_common.h
new file mode 100644
index 0000000..9a3e3f1
--- /dev/null
+++ b/av1/common/pred_common.h
@@ -0,0 +1,257 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_PRED_COMMON_H_
+#define VP10_COMMON_PRED_COMMON_H_
+
+#include "av1/common/blockd.h"
+#include "av1/common/onyxc_int.h"
+#include "aom_dsp/vpx_dsp_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static INLINE int get_segment_id(const VP10_COMMON *cm,
+                                 const uint8_t *segment_ids, BLOCK_SIZE bsize,
+                                 int mi_row, int mi_col) {
+  const int mi_offset = mi_row * cm->mi_cols + mi_col;
+  const int bw = num_8x8_blocks_wide_lookup[bsize];
+  const int bh = num_8x8_blocks_high_lookup[bsize];
+  const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
+  const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
+  int x, y, segment_id = MAX_SEGMENTS;
+
+  for (y = 0; y < ymis; ++y)
+    for (x = 0; x < xmis; ++x)
+      segment_id =
+          VPXMIN(segment_id, segment_ids[mi_offset + y * cm->mi_cols + x]);
+
+  assert(segment_id >= 0 && segment_id < MAX_SEGMENTS);
+  return segment_id;
+}
+
+static INLINE int vp10_get_pred_context_seg_id(const MACROBLOCKD *xd) {
+  const MODE_INFO *const above_mi = xd->above_mi;
+  const MODE_INFO *const left_mi = xd->left_mi;
+  const int above_sip =
+      (above_mi != NULL) ? above_mi->mbmi.seg_id_predicted : 0;
+  const int left_sip = (left_mi != NULL) ? left_mi->mbmi.seg_id_predicted : 0;
+
+  return above_sip + left_sip;
+}
+
+static INLINE vpx_prob vp10_get_pred_prob_seg_id(
+    const struct segmentation_probs *segp, const MACROBLOCKD *xd) {
+  return segp->pred_probs[vp10_get_pred_context_seg_id(xd)];
+}
+
+static INLINE int vp10_get_skip_context(const MACROBLOCKD *xd) {
+  const MODE_INFO *const above_mi = xd->above_mi;
+  const MODE_INFO *const left_mi = xd->left_mi;
+  const int above_skip = (above_mi != NULL) ? above_mi->mbmi.skip : 0;
+  const int left_skip = (left_mi != NULL) ? left_mi->mbmi.skip : 0;
+  return above_skip + left_skip;
+}
+
+static INLINE vpx_prob vp10_get_skip_prob(const VP10_COMMON *cm,
+                                          const MACROBLOCKD *xd) {
+  return cm->fc->skip_probs[vp10_get_skip_context(xd)];
+}
+
+#if CONFIG_DUAL_FILTER
+int vp10_get_pred_context_switchable_interp(const MACROBLOCKD *xd, int dir);
+#else
+int vp10_get_pred_context_switchable_interp(const MACROBLOCKD *xd);
+#endif
+
+#if CONFIG_EXT_INTRA
+int vp10_get_pred_context_intra_interp(const MACROBLOCKD *xd);
+#endif  // CONFIG_EXT_INTRA
+
+int vp10_get_intra_inter_context(const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_intra_inter_prob(const VP10_COMMON *cm,
+                                                 const MACROBLOCKD *xd) {
+  return cm->fc->intra_inter_prob[vp10_get_intra_inter_context(xd)];
+}
+
+int vp10_get_reference_mode_context(const VP10_COMMON *cm,
+                                    const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_reference_mode_prob(const VP10_COMMON *cm,
+                                                    const MACROBLOCKD *xd) {
+  return cm->fc->comp_inter_prob[vp10_get_reference_mode_context(cm, xd)];
+}
+
+int vp10_get_pred_context_comp_ref_p(const VP10_COMMON *cm,
+                                     const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_comp_ref_p(const VP10_COMMON *cm,
+                                                     const MACROBLOCKD *xd) {
+  const int pred_context = vp10_get_pred_context_comp_ref_p(cm, xd);
+  return cm->fc->comp_ref_prob[pred_context][0];
+}
+
+#if CONFIG_EXT_REFS
+int vp10_get_pred_context_comp_ref_p1(const VP10_COMMON *cm,
+                                      const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_comp_ref_p1(const VP10_COMMON *cm,
+                                                      const MACROBLOCKD *xd) {
+  const int pred_context = vp10_get_pred_context_comp_ref_p1(cm, xd);
+  return cm->fc->comp_ref_prob[pred_context][1];
+}
+
+int vp10_get_pred_context_comp_ref_p2(const VP10_COMMON *cm,
+                                      const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_comp_ref_p2(const VP10_COMMON *cm,
+                                                      const MACROBLOCKD *xd) {
+  const int pred_context = vp10_get_pred_context_comp_ref_p2(cm, xd);
+  return cm->fc->comp_ref_prob[pred_context][2];
+}
+
+int vp10_get_pred_context_comp_bwdref_p(const VP10_COMMON *cm,
+                                        const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_comp_bwdref_p(const VP10_COMMON *cm,
+                                                        const MACROBLOCKD *xd) {
+  const int pred_context = vp10_get_pred_context_comp_bwdref_p(cm, xd);
+  return cm->fc->comp_bwdref_prob[pred_context][0];
+}
+
+#endif  // CONFIG_EXT_REFS
+
+int vp10_get_pred_context_single_ref_p1(const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_single_ref_p1(const VP10_COMMON *cm,
+                                                        const MACROBLOCKD *xd) {
+  return cm->fc->single_ref_prob[vp10_get_pred_context_single_ref_p1(xd)][0];
+}
+
+int vp10_get_pred_context_single_ref_p2(const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_single_ref_p2(const VP10_COMMON *cm,
+                                                        const MACROBLOCKD *xd) {
+  return cm->fc->single_ref_prob[vp10_get_pred_context_single_ref_p2(xd)][1];
+}
+
+#if CONFIG_EXT_REFS
+int vp10_get_pred_context_single_ref_p3(const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_single_ref_p3(const VP10_COMMON *cm,
+                                                        const MACROBLOCKD *xd) {
+  return cm->fc->single_ref_prob[vp10_get_pred_context_single_ref_p3(xd)][2];
+}
+
+int vp10_get_pred_context_single_ref_p4(const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_single_ref_p4(const VP10_COMMON *cm,
+                                                        const MACROBLOCKD *xd) {
+  return cm->fc->single_ref_prob[vp10_get_pred_context_single_ref_p4(xd)][3];
+}
+
+int vp10_get_pred_context_single_ref_p5(const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_single_ref_p5(const VP10_COMMON *cm,
+                                                        const MACROBLOCKD *xd) {
+  return cm->fc->single_ref_prob[vp10_get_pred_context_single_ref_p5(xd)][4];
+}
+#endif  // CONFIG_EXT_REFS
+
+// Returns a context number for the given MB prediction signal
+// The mode info data structure has a one element border above and to the
+// left of the entries corresponding to real blocks.
+// The prediction flags in these dummy entries are initialized to 0.
+static INLINE int get_tx_size_context(const MACROBLOCKD *xd) {
+  const int max_tx_size = max_txsize_lookup[xd->mi[0]->mbmi.sb_type];
+  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+  const int has_above = xd->up_available;
+  const int has_left = xd->left_available;
+  int above_ctx = (has_above && !above_mbmi->skip)
+                      ? (int)txsize_sqr_map[above_mbmi->tx_size]
+                      : max_tx_size;
+  int left_ctx = (has_left && !left_mbmi->skip)
+                     ? (int)txsize_sqr_map[left_mbmi->tx_size]
+                     : max_tx_size;
+  assert(xd->mi[0]->mbmi.sb_type >= BLOCK_8X8);
+  if (!has_left) left_ctx = above_ctx;
+
+  if (!has_above) above_ctx = left_ctx;
+
+  return (above_ctx + left_ctx) > max_tx_size;
+}
+
+#if CONFIG_VAR_TX
+static void update_tx_counts(VP10_COMMON *cm, MACROBLOCKD *xd,
+                             MB_MODE_INFO *mbmi, BLOCK_SIZE plane_bsize,
+                             TX_SIZE tx_size, int blk_row, int blk_col,
+                             TX_SIZE max_tx_size, int ctx) {
+  const struct macroblockd_plane *const pd = &xd->plane[0];
+  const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+  const int tx_row = blk_row >> (1 - pd->subsampling_y);
+  const int tx_col = blk_col >> (1 - pd->subsampling_x);
+  const TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_row][tx_col];
+  int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+  int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+
+  if (xd->mb_to_bottom_edge < 0)
+    max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+  if (xd->mb_to_right_edge < 0)
+    max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+  if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
+
+  if (tx_size == plane_tx_size) {
+    ++xd->counts->tx_size[max_tx_size - TX_8X8][ctx][tx_size];
+    mbmi->tx_size = tx_size;
+  } else {
+    int bsl = b_width_log2_lookup[bsize];
+    int i;
+
+    assert(bsl > 0);
+    --bsl;
+
+    for (i = 0; i < 4; ++i) {
+      const int offsetr = blk_row + ((i >> 1) << bsl);
+      const int offsetc = blk_col + ((i & 0x01) << bsl);
+
+      if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
+      update_tx_counts(cm, xd, mbmi, plane_bsize, tx_size - 1, offsetr, offsetc,
+                       max_tx_size, ctx);
+    }
+  }
+}
+
+static INLINE void inter_block_tx_count_update(VP10_COMMON *cm, MACROBLOCKD *xd,
+                                               MB_MODE_INFO *mbmi,
+                                               BLOCK_SIZE plane_bsize,
+                                               int ctx) {
+  const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+  const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+  TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+  BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+  int bh = num_4x4_blocks_wide_lookup[txb_size];
+  int idx, idy;
+
+  for (idy = 0; idy < mi_height; idy += bh)
+    for (idx = 0; idx < mi_width; idx += bh)
+      update_tx_counts(cm, xd, mbmi, plane_bsize, max_tx_size, idy, idx,
+                       max_tx_size, ctx);
+}
+#endif
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_PRED_COMMON_H_
diff --git a/av1/common/quant_common.c b/av1/common/quant_common.c
new file mode 100644
index 0000000..79d8fb8
--- /dev/null
+++ b/av1/common/quant_common.c
@@ -0,0 +1,11325 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "av1/common/common.h"
+#include "av1/common/onyxc_int.h"
+#include "av1/common/entropy.h"
+#include "av1/common/quant_common.h"
+#include "av1/common/seg_common.h"
+#include "av1/common/blockd.h"
+
+#if CONFIG_AOM_QM
+static void make_qmatrices(qm_val_t *wmatrix[NUM_QM_LEVELS][2][2][TX_SIZES],
+                           qm_val_t *iwmatrix[NUM_QM_LEVELS][2][2][TX_SIZES]);
+#endif
+#if CONFIG_NEW_QUANT
+
+// Bin widths expressed as a fraction over 128 of the quant stepsize,
+// for the quantization bins 0-4.
+// So a value x indicates the bin is actually factor x/128 of the
+// nominal quantization step.  For the zero bin, the width is only
+// for one side of zero, so the actual width is twice that.
+//
+// Functions with nuq correspond to "non uniform quantization"
+// TODO(sarahparker, debargha): Optimize these tables
+
+typedef struct {
+  uint8_t knots[NUQ_KNOTS];  // offsets
+  uint8_t doff;              // dequantization
+} qprofile_type;
+
+static const qprofile_type nuq_lossless[COEF_BANDS] = {
+  { { 64, 128, 128 }, 0 },  // dc, band 0
+  { { 64, 128, 128 }, 0 },  // band 1
+  { { 64, 128, 128 }, 0 },  // band 2
+  { { 64, 128, 128 }, 0 },  // band 3
+  { { 64, 128, 128 }, 0 },  // band 4
+  { { 64, 128, 128 }, 0 },  // band 5
+};
+
+static const qprofile_type nuq[QUANT_PROFILES][QUANT_RANGES][COEF_BANDS] = {
+  { {
+        { { 64, 128, 128 }, 0 },  // dc, band 0
+        { { 64, 128, 128 }, 0 },  // band 1
+        { { 64, 128, 128 }, 0 },  // band 2
+        { { 64, 128, 128 }, 0 },  // band 3
+        { { 64, 128, 128 }, 0 },  // band 4
+        { { 64, 128, 128 }, 0 }   // band 5
+    },
+    {
+        { { 64, 128, 128 }, 0 },  // dc, band 0
+        { { 64, 128, 128 }, 0 },  // band 1
+        { { 64, 128, 128 }, 0 },  // band 2
+        { { 64, 128, 128 }, 0 },  // band 3
+        { { 64, 128, 128 }, 0 },  // band 4
+        { { 64, 128, 128 }, 0 }   // band 5
+    } },
+#if QUANT_PROFILES > 1
+  { {
+        { { 64, 128, 128 }, 0 },  // dc, band 0
+        { { 64, 128, 128 }, 0 },  // band 1
+        { { 64, 128, 128 }, 0 },  // band 2
+        { { 64, 128, 128 }, 0 },  // band 3
+        { { 64, 128, 128 }, 0 },  // band 4
+        { { 64, 128, 128 }, 0 }   // band 5
+    },
+    {
+        { { 64, 128, 128 }, 0 },  // dc, band 0
+        { { 64, 128, 128 }, 0 },  // band 1
+        { { 64, 128, 128 }, 0 },  // band 2
+        { { 64, 128, 128 }, 0 },  // band 3
+        { { 64, 128, 128 }, 0 },  // band 4
+        { { 64, 128, 128 }, 0 }   // band 5
+    } },
+#if QUANT_PROFILES > 2
+  { {
+        { { 64, 128, 128 }, 0 },  // dc, band 0
+        { { 64, 128, 128 }, 0 },  // band 1
+        { { 64, 128, 128 }, 0 },  // band 2
+        { { 64, 128, 128 }, 0 },  // band 3
+        { { 64, 128, 128 }, 0 },  // band 4
+        { { 64, 128, 128 }, 0 },  // band 5
+    },
+    {
+        { { 64, 128, 128 }, 0 },  // dc, band 0
+        { { 64, 128, 128 }, 0 },  // band 1
+        { { 64, 128, 128 }, 0 },  // band 2
+        { { 64, 128, 128 }, 0 },  // band 3
+        { { 64, 128, 128 }, 0 },  // band 4
+        { { 64, 128, 128 }, 0 },  // band 5
+    } }
+#endif  // QUANT_PROFILES > 2
+#endif  // QUANT_PROFILES > 1
+};
+
+static INLINE int qrange_from_qindex(int qindex) {
+  // return high quality (1) or low quality (0)
+  return qindex < 140 ? 1 : 0;
+}
+
+static const uint8_t *get_nuq_knots(int qindex, int band, int q_profile) {
+  if (!qindex)
+    return nuq_lossless[band].knots;
+  else
+    return nuq[q_profile][qrange_from_qindex(qindex)][band].knots;
+}
+
+static INLINE int16_t quant_to_doff_fixed(int qindex, int band, int q_profile) {
+  if (!qindex)
+    return nuq_lossless[band].doff;
+  else
+    return nuq[q_profile][qrange_from_qindex(qindex)][band].doff;
+}
+
+// get cumulative bins
+static INLINE void get_cuml_bins_nuq(int q, int qindex, int band,
+                                     tran_low_t *cuml_bins, int q_profile) {
+  const uint8_t *knots = get_nuq_knots(qindex, band, q_profile);
+  int16_t cuml_knots[NUQ_KNOTS];
+  int i;
+  cuml_knots[0] = knots[0];
+  for (i = 1; i < NUQ_KNOTS; ++i) cuml_knots[i] = cuml_knots[i - 1] + knots[i];
+  for (i = 0; i < NUQ_KNOTS; ++i)
+    cuml_bins[i] = ROUND_POWER_OF_TWO(cuml_knots[i] * q, 7);
+}
+
+void vp10_get_dequant_val_nuq(int q, int qindex, int band, tran_low_t *dq,
+                              tran_low_t *cuml_bins, int q_profile) {
+  const uint8_t *knots = get_nuq_knots(qindex, band, q_profile);
+  tran_low_t cuml_bins_[NUQ_KNOTS], *cuml_bins_ptr;
+  tran_low_t doff;
+  int i;
+  cuml_bins_ptr = (cuml_bins ? cuml_bins : cuml_bins_);
+  get_cuml_bins_nuq(q, qindex, band, cuml_bins_ptr, q_profile);
+  dq[0] = 0;
+  for (i = 1; i < NUQ_KNOTS; ++i) {
+    doff = quant_to_doff_fixed(qindex, band, q_profile);
+    doff = ROUND_POWER_OF_TWO(doff * knots[i], 7);
+    dq[i] =
+        cuml_bins_ptr[i - 1] + ROUND_POWER_OF_TWO((knots[i] - doff * 2) * q, 8);
+  }
+  doff = quant_to_doff_fixed(qindex, band, q_profile);
+  dq[NUQ_KNOTS] =
+      cuml_bins_ptr[NUQ_KNOTS - 1] + ROUND_POWER_OF_TWO((64 - doff) * q, 7);
+}
+
+tran_low_t vp10_dequant_abscoeff_nuq(int v, int q, const tran_low_t *dq) {
+  if (v <= NUQ_KNOTS)
+    return dq[v];
+  else
+    return dq[NUQ_KNOTS] + (v - NUQ_KNOTS) * q;
+}
+
+tran_low_t vp10_dequant_coeff_nuq(int v, int q, const tran_low_t *dq) {
+  tran_low_t dqmag = vp10_dequant_abscoeff_nuq(abs(v), q, dq);
+  return (v < 0 ? -dqmag : dqmag);
+}
+#endif  // CONFIG_NEW_QUANT
+
+static const int16_t dc_qlookup[QINDEX_RANGE] = {
+  4,    8,    8,    9,    10,  11,  12,  12,  13,  14,  15,   16,   17,   18,
+  19,   19,   20,   21,   22,  23,  24,  25,  26,  26,  27,   28,   29,   30,
+  31,   32,   32,   33,   34,  35,  36,  37,  38,  38,  39,   40,   41,   42,
+  43,   43,   44,   45,   46,  47,  48,  48,  49,  50,  51,   52,   53,   53,
+  54,   55,   56,   57,   57,  58,  59,  60,  61,  62,  62,   63,   64,   65,
+  66,   66,   67,   68,   69,  70,  70,  71,  72,  73,  74,   74,   75,   76,
+  77,   78,   78,   79,   80,  81,  81,  82,  83,  84,  85,   85,   87,   88,
+  90,   92,   93,   95,   96,  98,  99,  101, 102, 104, 105,  107,  108,  110,
+  111,  113,  114,  116,  117, 118, 120, 121, 123, 125, 127,  129,  131,  134,
+  136,  138,  140,  142,  144, 146, 148, 150, 152, 154, 156,  158,  161,  164,
+  166,  169,  172,  174,  177, 180, 182, 185, 187, 190, 192,  195,  199,  202,
+  205,  208,  211,  214,  217, 220, 223, 226, 230, 233, 237,  240,  243,  247,
+  250,  253,  257,  261,  265, 269, 272, 276, 280, 284, 288,  292,  296,  300,
+  304,  309,  313,  317,  322, 326, 330, 335, 340, 344, 349,  354,  359,  364,
+  369,  374,  379,  384,  389, 395, 400, 406, 411, 417, 423,  429,  435,  441,
+  447,  454,  461,  467,  475, 482, 489, 497, 505, 513, 522,  530,  539,  549,
+  559,  569,  579,  590,  602, 614, 626, 640, 654, 668, 684,  700,  717,  736,
+  755,  775,  796,  819,  843, 869, 896, 925, 955, 988, 1022, 1058, 1098, 1139,
+  1184, 1232, 1282, 1336,
+};
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static const int16_t dc_qlookup_10[QINDEX_RANGE] = {
+  4,    9,    10,   13,   15,   17,   20,   22,   25,   28,   31,   34,   37,
+  40,   43,   47,   50,   53,   57,   60,   64,   68,   71,   75,   78,   82,
+  86,   90,   93,   97,   101,  105,  109,  113,  116,  120,  124,  128,  132,
+  136,  140,  143,  147,  151,  155,  159,  163,  166,  170,  174,  178,  182,
+  185,  189,  193,  197,  200,  204,  208,  212,  215,  219,  223,  226,  230,
+  233,  237,  241,  244,  248,  251,  255,  259,  262,  266,  269,  273,  276,
+  280,  283,  287,  290,  293,  297,  300,  304,  307,  310,  314,  317,  321,
+  324,  327,  331,  334,  337,  343,  350,  356,  362,  369,  375,  381,  387,
+  394,  400,  406,  412,  418,  424,  430,  436,  442,  448,  454,  460,  466,
+  472,  478,  484,  490,  499,  507,  516,  525,  533,  542,  550,  559,  567,
+  576,  584,  592,  601,  609,  617,  625,  634,  644,  655,  666,  676,  687,
+  698,  708,  718,  729,  739,  749,  759,  770,  782,  795,  807,  819,  831,
+  844,  856,  868,  880,  891,  906,  920,  933,  947,  961,  975,  988,  1001,
+  1015, 1030, 1045, 1061, 1076, 1090, 1105, 1120, 1137, 1153, 1170, 1186, 1202,
+  1218, 1236, 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379, 1398, 1416, 1436,
+  1456, 1476, 1496, 1516, 1537, 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717,
+  1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929, 1958, 1990, 2021, 2054, 2088,
+  2123, 2159, 2197, 2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561, 2616, 2675,
+  2737, 2802, 2871, 2944, 3020, 3102, 3188, 3280, 3375, 3478, 3586, 3702, 3823,
+  3953, 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347,
+};
+
+static const int16_t dc_qlookup_12[QINDEX_RANGE] = {
+  4,     12,    18,    25,    33,    41,    50,    60,    70,    80,    91,
+  103,   115,   127,   140,   153,   166,   180,   194,   208,   222,   237,
+  251,   266,   281,   296,   312,   327,   343,   358,   374,   390,   405,
+  421,   437,   453,   469,   484,   500,   516,   532,   548,   564,   580,
+  596,   611,   627,   643,   659,   674,   690,   706,   721,   737,   752,
+  768,   783,   798,   814,   829,   844,   859,   874,   889,   904,   919,
+  934,   949,   964,   978,   993,   1008,  1022,  1037,  1051,  1065,  1080,
+  1094,  1108,  1122,  1136,  1151,  1165,  1179,  1192,  1206,  1220,  1234,
+  1248,  1261,  1275,  1288,  1302,  1315,  1329,  1342,  1368,  1393,  1419,
+  1444,  1469,  1494,  1519,  1544,  1569,  1594,  1618,  1643,  1668,  1692,
+  1717,  1741,  1765,  1789,  1814,  1838,  1862,  1885,  1909,  1933,  1957,
+  1992,  2027,  2061,  2096,  2130,  2165,  2199,  2233,  2267,  2300,  2334,
+  2367,  2400,  2434,  2467,  2499,  2532,  2575,  2618,  2661,  2704,  2746,
+  2788,  2830,  2872,  2913,  2954,  2995,  3036,  3076,  3127,  3177,  3226,
+  3275,  3324,  3373,  3421,  3469,  3517,  3565,  3621,  3677,  3733,  3788,
+  3843,  3897,  3951,  4005,  4058,  4119,  4181,  4241,  4301,  4361,  4420,
+  4479,  4546,  4612,  4677,  4742,  4807,  4871,  4942,  5013,  5083,  5153,
+  5222,  5291,  5367,  5442,  5517,  5591,  5665,  5745,  5825,  5905,  5984,
+  6063,  6149,  6234,  6319,  6404,  6495,  6587,  6678,  6769,  6867,  6966,
+  7064,  7163,  7269,  7376,  7483,  7599,  7715,  7832,  7958,  8085,  8214,
+  8352,  8492,  8635,  8788,  8945,  9104,  9275,  9450,  9639,  9832,  10031,
+  10245, 10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409, 12750, 13118,
+  13501, 13913, 14343, 14807, 15290, 15812, 16356, 16943, 17575, 18237, 18949,
+  19718, 20521, 21387,
+};
+#endif
+
+static const int16_t ac_qlookup[QINDEX_RANGE] = {
+  4,    8,    9,    10,   11,   12,   13,   14,   15,   16,   17,   18,   19,
+  20,   21,   22,   23,   24,   25,   26,   27,   28,   29,   30,   31,   32,
+  33,   34,   35,   36,   37,   38,   39,   40,   41,   42,   43,   44,   45,
+  46,   47,   48,   49,   50,   51,   52,   53,   54,   55,   56,   57,   58,
+  59,   60,   61,   62,   63,   64,   65,   66,   67,   68,   69,   70,   71,
+  72,   73,   74,   75,   76,   77,   78,   79,   80,   81,   82,   83,   84,
+  85,   86,   87,   88,   89,   90,   91,   92,   93,   94,   95,   96,   97,
+  98,   99,   100,  101,  102,  104,  106,  108,  110,  112,  114,  116,  118,
+  120,  122,  124,  126,  128,  130,  132,  134,  136,  138,  140,  142,  144,
+  146,  148,  150,  152,  155,  158,  161,  164,  167,  170,  173,  176,  179,
+  182,  185,  188,  191,  194,  197,  200,  203,  207,  211,  215,  219,  223,
+  227,  231,  235,  239,  243,  247,  251,  255,  260,  265,  270,  275,  280,
+  285,  290,  295,  300,  305,  311,  317,  323,  329,  335,  341,  347,  353,
+  359,  366,  373,  380,  387,  394,  401,  408,  416,  424,  432,  440,  448,
+  456,  465,  474,  483,  492,  501,  510,  520,  530,  540,  550,  560,  571,
+  582,  593,  604,  615,  627,  639,  651,  663,  676,  689,  702,  715,  729,
+  743,  757,  771,  786,  801,  816,  832,  848,  864,  881,  898,  915,  933,
+  951,  969,  988,  1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151, 1173, 1196,
+  1219, 1243, 1267, 1292, 1317, 1343, 1369, 1396, 1423, 1451, 1479, 1508, 1537,
+  1567, 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828,
+};
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static const int16_t ac_qlookup_10[QINDEX_RANGE] = {
+  4,    9,    11,   13,   16,   18,   21,   24,   27,   30,   33,   37,   40,
+  44,   48,   51,   55,   59,   63,   67,   71,   75,   79,   83,   88,   92,
+  96,   100,  105,  109,  114,  118,  122,  127,  131,  136,  140,  145,  149,
+  154,  158,  163,  168,  172,  177,  181,  186,  190,  195,  199,  204,  208,
+  213,  217,  222,  226,  231,  235,  240,  244,  249,  253,  258,  262,  267,
+  271,  275,  280,  284,  289,  293,  297,  302,  306,  311,  315,  319,  324,
+  328,  332,  337,  341,  345,  349,  354,  358,  362,  367,  371,  375,  379,
+  384,  388,  392,  396,  401,  409,  417,  425,  433,  441,  449,  458,  466,
+  474,  482,  490,  498,  506,  514,  523,  531,  539,  547,  555,  563,  571,
+  579,  588,  596,  604,  616,  628,  640,  652,  664,  676,  688,  700,  713,
+  725,  737,  749,  761,  773,  785,  797,  809,  825,  841,  857,  873,  889,
+  905,  922,  938,  954,  970,  986,  1002, 1018, 1038, 1058, 1078, 1098, 1118,
+  1138, 1158, 1178, 1198, 1218, 1242, 1266, 1290, 1314, 1338, 1362, 1386, 1411,
+  1435, 1463, 1491, 1519, 1547, 1575, 1603, 1631, 1663, 1695, 1727, 1759, 1791,
+  1823, 1859, 1895, 1931, 1967, 2003, 2039, 2079, 2119, 2159, 2199, 2239, 2283,
+  2327, 2371, 2415, 2459, 2507, 2555, 2603, 2651, 2703, 2755, 2807, 2859, 2915,
+  2971, 3027, 3083, 3143, 3203, 3263, 3327, 3391, 3455, 3523, 3591, 3659, 3731,
+  3803, 3876, 3952, 4028, 4104, 4184, 4264, 4348, 4432, 4516, 4604, 4692, 4784,
+  4876, 4972, 5068, 5168, 5268, 5372, 5476, 5584, 5692, 5804, 5916, 6032, 6148,
+  6268, 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312,
+};
+
+static const int16_t ac_qlookup_12[QINDEX_RANGE] = {
+  4,     13,    19,    27,    35,    44,    54,    64,    75,    87,    99,
+  112,   126,   139,   154,   168,   183,   199,   214,   230,   247,   263,
+  280,   297,   314,   331,   349,   366,   384,   402,   420,   438,   456,
+  475,   493,   511,   530,   548,   567,   586,   604,   623,   642,   660,
+  679,   698,   716,   735,   753,   772,   791,   809,   828,   846,   865,
+  884,   902,   920,   939,   957,   976,   994,   1012,  1030,  1049,  1067,
+  1085,  1103,  1121,  1139,  1157,  1175,  1193,  1211,  1229,  1246,  1264,
+  1282,  1299,  1317,  1335,  1352,  1370,  1387,  1405,  1422,  1440,  1457,
+  1474,  1491,  1509,  1526,  1543,  1560,  1577,  1595,  1627,  1660,  1693,
+  1725,  1758,  1791,  1824,  1856,  1889,  1922,  1954,  1987,  2020,  2052,
+  2085,  2118,  2150,  2183,  2216,  2248,  2281,  2313,  2346,  2378,  2411,
+  2459,  2508,  2556,  2605,  2653,  2701,  2750,  2798,  2847,  2895,  2943,
+  2992,  3040,  3088,  3137,  3185,  3234,  3298,  3362,  3426,  3491,  3555,
+  3619,  3684,  3748,  3812,  3876,  3941,  4005,  4069,  4149,  4230,  4310,
+  4390,  4470,  4550,  4631,  4711,  4791,  4871,  4967,  5064,  5160,  5256,
+  5352,  5448,  5544,  5641,  5737,  5849,  5961,  6073,  6185,  6297,  6410,
+  6522,  6650,  6778,  6906,  7034,  7162,  7290,  7435,  7579,  7723,  7867,
+  8011,  8155,  8315,  8475,  8635,  8795,  8956,  9132,  9308,  9484,  9660,
+  9836,  10028, 10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661, 11885,
+  12109, 12333, 12573, 12813, 13053, 13309, 13565, 13821, 14093, 14365, 14637,
+  14925, 15213, 15502, 15806, 16110, 16414, 16734, 17054, 17390, 17726, 18062,
+  18414, 18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486, 21902, 22334,
+  22766, 23214, 23662, 24126, 24590, 25070, 25551, 26047, 26559, 27071, 27599,
+  28143, 28687, 29247,
+};
+#endif
+
+int16_t vp10_dc_quant(int qindex, int delta, vpx_bit_depth_t bit_depth) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  switch (bit_depth) {
+    case VPX_BITS_8: return dc_qlookup[clamp(qindex + delta, 0, MAXQ)];
+    case VPX_BITS_10: return dc_qlookup_10[clamp(qindex + delta, 0, MAXQ)];
+    case VPX_BITS_12: return dc_qlookup_12[clamp(qindex + delta, 0, MAXQ)];
+    default:
+      assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+      return -1;
+  }
+#else
+  (void)bit_depth;
+  return dc_qlookup[clamp(qindex + delta, 0, MAXQ)];
+#endif
+}
+
+int16_t vp10_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  switch (bit_depth) {
+    case VPX_BITS_8: return ac_qlookup[clamp(qindex + delta, 0, MAXQ)];
+    case VPX_BITS_10: return ac_qlookup_10[clamp(qindex + delta, 0, MAXQ)];
+    case VPX_BITS_12: return ac_qlookup_12[clamp(qindex + delta, 0, MAXQ)];
+    default:
+      assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+      return -1;
+  }
+#else
+  (void)bit_depth;
+  return ac_qlookup[clamp(qindex + delta, 0, MAXQ)];
+#endif
+}
+
+int vp10_get_qindex(const struct segmentation *seg, int segment_id,
+                    int base_qindex) {
+  if (segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) {
+    const int data = get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
+    const int seg_qindex =
+        seg->abs_delta == SEGMENT_ABSDATA ? data : base_qindex + data;
+    return clamp(seg_qindex, 0, MAXQ);
+  } else {
+    return base_qindex;
+  }
+}
+
+#if CONFIG_AOM_QM
+qm_val_t *aom_iqmatrix(VP10_COMMON *cm, int qmlevel, int is_chroma,
+                       int log2sizem2, int is_intra) {
+  return &cm->giqmatrix[qmlevel][!!is_chroma][!!is_intra][log2sizem2][0];
+}
+qm_val_t *aom_qmatrix(VP10_COMMON *cm, int qmlevel, int is_chroma,
+                      int log2sizem2, int is_intra) {
+  return &cm->gqmatrix[qmlevel][!!is_chroma][!!is_intra][log2sizem2][0];
+}
+
+static uint16_t
+    iwt_matrix_ref[NUM_QM_LEVELS][2][2][4 * 4 + 8 * 8 + 16 * 16 + 32 * 32];
+static uint16_t
+    wt_matrix_ref[NUM_QM_LEVELS][2][2][4 * 4 + 8 * 8 + 16 * 16 + 32 * 32];
+
+void aom_qm_init(VP10_COMMON *cm) {
+  int q, c, f, t, size;
+  int current;
+  for (q = 0; q < NUM_QM_LEVELS; ++q) {
+    for (c = 0; c < 2; ++c) {
+      for (f = 0; f < 2; ++f) {
+        current = 0;
+        for (t = 0; t < TX_SIZES; ++t) {
+          size = 1 << (t + 2);
+          cm->gqmatrix[q][c][f][t] = &wt_matrix_ref[q][c][f][current];
+          cm->giqmatrix[q][c][f][t] = &iwt_matrix_ref[q][c][f][current];
+          current += size * size;
+        }
+      }
+    }
+  }
+}
+
+static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][2][4 * 4 + 8 * 8 + 16 * 16 +
+                                                    32 * 32] = {
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 71, 124, 214, 71, 112, 165, 241, 124, 165, 254, 331, 214, 241, 331,
+        414,
+        /* Size 8 */
+        64, 47, 51, 69, 97, 132, 173, 218, 47, 54, 52, 62, 81, 109, 142, 181,
+        51, 52, 75, 90, 108, 133, 165, 201, 69, 62, 90, 119, 144, 169, 198, 232,
+        97, 81, 108, 144, 178, 208, 238, 268, 132, 109, 133, 169, 208, 244, 276,
+        305, 173, 142, 165, 198, 238, 276, 309, 338, 218, 181, 201, 232, 268,
+        305, 338, 367,
+        /* Size 16 */
+        64, 54, 47, 49, 51, 59, 69, 81, 97, 111, 132, 150, 173, 193, 218, 218,
+        54, 52, 50, 51, 51, 58, 65, 75, 88, 101, 119, 135, 156, 175, 198, 198,
+        47, 50, 54, 53, 52, 56, 62, 70, 81, 93, 109, 123, 142, 159, 181, 181,
+        49, 51, 53, 57, 61, 67, 73, 82, 93, 104, 120, 134, 153, 170, 191, 191,
+        51, 51, 52, 61, 75, 82, 90, 98, 108, 119, 133, 147, 165, 181, 201, 201,
+        59, 58, 56, 67, 82, 91, 102, 112, 123, 135, 149, 163, 180, 196, 215,
+        215, 69, 65, 62, 73, 90, 102, 119, 130, 144, 155, 169, 182, 198, 214,
+        232, 232, 81, 75, 70, 82, 98, 112, 130, 143, 159, 172, 186, 200, 216,
+        231, 249, 249, 97, 88, 81, 93, 108, 123, 144, 159, 178, 192, 208, 222,
+        238, 252, 268, 268, 111, 101, 93, 104, 119, 135, 155, 172, 192, 207,
+        225, 239, 255, 269, 285, 285, 132, 119, 109, 120, 133, 149, 169, 186,
+        208, 225, 244, 259, 276, 290, 305, 305, 150, 135, 123, 134, 147, 163,
+        182, 200, 222, 239, 259, 274, 291, 305, 321, 321, 173, 156, 142, 153,
+        165, 180, 198, 216, 238, 255, 276, 291, 309, 323, 338, 338, 193, 175,
+        159, 170, 181, 196, 214, 231, 252, 269, 290, 305, 323, 337, 352, 352,
+        218, 198, 181, 191, 201, 215, 232, 249, 268, 285, 305, 321, 338, 352,
+        367, 367, 218, 198, 181, 191, 201, 215, 232, 249, 268, 285, 305, 321,
+        338, 352, 367, 367,
+        /* Size 32 */
+        64, 59, 54, 50, 47, 48, 49, 50, 51, 55, 59, 63, 69, 74, 81, 88, 97, 104,
+        111, 121, 132, 140, 150, 161, 173, 183, 193, 205, 218, 218, 218, 218,
+        59, 56, 53, 51, 49, 49, 50, 51, 51, 54, 58, 62, 67, 72, 78, 84, 92, 99,
+        106, 115, 125, 133, 142, 152, 164, 173, 183, 195, 208, 208, 208, 208,
+        54, 53, 52, 51, 50, 51, 51, 51, 51, 54, 58, 61, 65, 70, 75, 81, 88, 94,
+        101, 110, 119, 127, 135, 145, 156, 165, 175, 186, 198, 198, 198, 198,
+        50, 51, 51, 52, 52, 52, 52, 52, 52, 54, 57, 60, 63, 68, 72, 78, 85, 90,
+        97, 105, 114, 121, 129, 138, 149, 157, 167, 177, 189, 189, 189, 189, 47,
+        49, 50, 52, 54, 54, 53, 52, 52, 54, 56, 59, 62, 66, 70, 75, 81, 87, 93,
+        100, 109, 115, 123, 132, 142, 150, 159, 170, 181, 181, 181, 181, 48, 49,
+        51, 52, 54, 54, 55, 56, 56, 59, 61, 64, 67, 71, 76, 81, 87, 92, 98, 105,
+        114, 121, 128, 137, 147, 155, 164, 174, 186, 186, 186, 186, 49, 50, 51,
+        52, 53, 55, 57, 59, 61, 64, 67, 70, 73, 77, 82, 87, 93, 98, 104, 111,
+        120, 126, 134, 143, 153, 161, 170, 179, 191, 191, 191, 191, 50, 51, 51,
+        52, 52, 56, 59, 63, 68, 71, 74, 77, 81, 85, 89, 94, 100, 105, 111, 118,
+        126, 133, 140, 149, 158, 166, 175, 185, 196, 196, 196, 196, 51, 51, 51,
+        52, 52, 56, 61, 68, 75, 79, 82, 86, 90, 94, 98, 103, 108, 113, 119, 126,
+        133, 140, 147, 155, 165, 172, 181, 191, 201, 201, 201, 201, 55, 54, 54,
+        54, 54, 59, 64, 71, 79, 82, 86, 91, 96, 100, 105, 110, 115, 120, 126,
+        133, 140, 147, 155, 163, 172, 180, 188, 198, 208, 208, 208, 208, 59, 58,
+        58, 57, 56, 61, 67, 74, 82, 86, 91, 96, 102, 107, 112, 117, 123, 129,
+        135, 141, 149, 156, 163, 171, 180, 188, 196, 205, 215, 215, 215, 215,
+        63, 62, 61, 60, 59, 64, 70, 77, 86, 91, 96, 103, 110, 115, 120, 126,
+        133, 138, 144, 151, 158, 165, 172, 180, 189, 196, 204, 213, 223, 223,
+        223, 223, 69, 67, 65, 63, 62, 67, 73, 81, 90, 96, 102, 110, 119, 124,
+        130, 137, 144, 149, 155, 162, 169, 175, 182, 190, 198, 206, 214, 222,
+        232, 232, 232, 232, 74, 72, 70, 68, 66, 71, 77, 85, 94, 100, 107, 115,
+        124, 130, 136, 143, 151, 157, 163, 170, 177, 184, 191, 199, 207, 214,
+        222, 231, 240, 240, 240, 240, 81, 78, 75, 72, 70, 76, 82, 89, 98, 105,
+        112, 120, 130, 136, 143, 151, 159, 165, 172, 179, 186, 193, 200, 208,
+        216, 223, 231, 240, 249, 249, 249, 249, 88, 84, 81, 78, 75, 81, 87, 94,
+        103, 110, 117, 126, 137, 143, 151, 159, 168, 174, 181, 189, 197, 203,
+        211, 218, 226, 234, 241, 249, 258, 258, 258, 258, 97, 92, 88, 85, 81,
+        87, 93, 100, 108, 115, 123, 133, 144, 151, 159, 168, 178, 184, 192, 200,
+        208, 215, 222, 229, 238, 245, 252, 260, 268, 268, 268, 268, 104, 99, 94,
+        90, 87, 92, 98, 105, 113, 120, 129, 138, 149, 157, 165, 174, 184, 191,
+        199, 207, 216, 223, 230, 238, 246, 253, 260, 268, 276, 276, 276, 276,
+        111, 106, 101, 97, 93, 98, 104, 111, 119, 126, 135, 144, 155, 163, 172,
+        181, 192, 199, 207, 215, 225, 232, 239, 247, 255, 262, 269, 277, 285,
+        285, 285, 285, 121, 115, 110, 105, 100, 105, 111, 118, 126, 133, 141,
+        151, 162, 170, 179, 189, 200, 207, 215, 224, 234, 241, 248, 256, 265,
+        272, 279, 287, 295, 295, 295, 295, 132, 125, 119, 114, 109, 114, 120,
+        126, 133, 140, 149, 158, 169, 177, 186, 197, 208, 216, 225, 234, 244,
+        251, 259, 267, 276, 282, 290, 297, 305, 305, 305, 305, 140, 133, 127,
+        121, 115, 121, 126, 133, 140, 147, 156, 165, 175, 184, 193, 203, 215,
+        223, 232, 241, 251, 258, 266, 275, 283, 290, 297, 305, 313, 313, 313,
+        313, 150, 142, 135, 129, 123, 128, 134, 140, 147, 155, 163, 172, 182,
+        191, 200, 211, 222, 230, 239, 248, 259, 266, 274, 283, 291, 298, 305,
+        313, 321, 321, 321, 321, 161, 152, 145, 138, 132, 137, 143, 149, 155,
+        163, 171, 180, 190, 199, 208, 218, 229, 238, 247, 256, 267, 275, 283,
+        291, 300, 307, 314, 322, 329, 329, 329, 329, 173, 164, 156, 149, 142,
+        147, 153, 158, 165, 172, 180, 189, 198, 207, 216, 226, 238, 246, 255,
+        265, 276, 283, 291, 300, 309, 316, 323, 331, 338, 338, 338, 338, 183,
+        173, 165, 157, 150, 155, 161, 166, 172, 180, 188, 196, 206, 214, 223,
+        234, 245, 253, 262, 272, 282, 290, 298, 307, 316, 323, 330, 337, 345,
+        345, 345, 345, 193, 183, 175, 167, 159, 164, 170, 175, 181, 188, 196,
+        204, 214, 222, 231, 241, 252, 260, 269, 279, 290, 297, 305, 314, 323,
+        330, 337, 345, 352, 352, 352, 352, 205, 195, 186, 177, 170, 174, 179,
+        185, 191, 198, 205, 213, 222, 231, 240, 249, 260, 268, 277, 287, 297,
+        305, 313, 322, 331, 337, 345, 352, 360, 360, 360, 360, 218, 208, 198,
+        189, 181, 186, 191, 196, 201, 208, 215, 223, 232, 240, 249, 258, 268,
+        276, 285, 295, 305, 313, 321, 329, 338, 345, 352, 360, 367, 367, 367,
+        367, 218, 208, 198, 189, 181, 186, 191, 196, 201, 208, 215, 223, 232,
+        240, 249, 258, 268, 276, 285, 295, 305, 313, 321, 329, 338, 345, 352,
+        360, 367, 367, 367, 367, 218, 208, 198, 189, 181, 186, 191, 196, 201,
+        208, 215, 223, 232, 240, 249, 258, 268, 276, 285, 295, 305, 313, 321,
+        329, 338, 345, 352, 360, 367, 367, 367, 367, 218, 208, 198, 189, 181,
+        186, 191, 196, 201, 208, 215, 223, 232, 240, 249, 258, 268, 276, 285,
+        295, 305, 313, 321, 329, 338, 345, 352, 360, 367, 367, 367, 367 },
+      { /* Intra matrices */
+        /* Size 4 */
+        16, 18, 33, 60, 18, 29, 45, 68, 33, 45, 72, 98, 60, 68, 98, 129,
+        /* Size 8 */
+        20, 14, 16, 21, 31, 43, 58, 75, 14, 17, 16, 19, 25, 35, 46, 61, 16, 16,
+        24, 28, 34, 43, 54, 68, 21, 19, 28, 38, 47, 56, 67, 80, 31, 25, 34, 47,
+        59, 71, 83, 95, 43, 35, 43, 56, 71, 85, 99, 112, 58, 46, 54, 67, 83, 99,
+        113, 127, 75, 61, 68, 80, 95, 112, 127, 141,
+        /* Size 16 */
+        19, 16, 14, 14, 15, 17, 20, 24, 29, 34, 41, 47, 55, 62, 71, 71, 16, 15,
+        15, 15, 15, 17, 19, 22, 26, 31, 36, 42, 49, 55, 64, 64, 14, 15, 16, 16,
+        15, 17, 18, 21, 24, 28, 33, 38, 44, 50, 58, 58, 14, 15, 16, 17, 18, 20,
+        22, 24, 28, 32, 37, 41, 48, 54, 61, 61, 15, 15, 15, 18, 22, 24, 27, 30,
+        33, 36, 41, 46, 52, 58, 65, 65, 17, 17, 17, 20, 24, 27, 31, 34, 38, 42,
+        46, 51, 57, 63, 70, 70, 20, 19, 18, 22, 27, 31, 36, 40, 45, 49, 53, 58,
+        64, 70, 76, 76, 24, 22, 21, 24, 30, 34, 40, 44, 50, 54, 60, 65, 71, 76,
+        83, 83, 29, 26, 24, 28, 33, 38, 45, 50, 56, 61, 67, 73, 79, 84, 91, 91,
+        34, 31, 28, 32, 36, 42, 49, 54, 61, 67, 74, 79, 86, 91, 98, 98, 41, 36,
+        33, 37, 41, 46, 53, 60, 67, 74, 81, 87, 94, 100, 106, 106, 47, 42, 38,
+        41, 46, 51, 58, 65, 73, 79, 87, 93, 100, 106, 113, 113, 55, 49, 44, 48,
+        52, 57, 64, 71, 79, 86, 94, 100, 108, 114, 121, 121, 62, 55, 50, 54, 58,
+        63, 70, 76, 84, 91, 100, 106, 114, 120, 127, 127, 71, 64, 58, 61, 65,
+        70, 76, 83, 91, 98, 106, 113, 121, 127, 134, 134, 71, 64, 58, 61, 65,
+        70, 76, 83, 91, 98, 106, 113, 121, 127, 134, 134,
+        /* Size 32 */
+        18, 17, 15, 14, 13, 14, 14, 14, 15, 16, 17, 18, 20, 22, 23, 26, 28, 30,
+        33, 36, 40, 42, 45, 49, 53, 57, 60, 65, 69, 69, 69, 69, 17, 16, 15, 14,
+        14, 14, 14, 14, 15, 16, 17, 18, 19, 21, 23, 25, 27, 29, 31, 34, 37, 40,
+        43, 46, 50, 53, 57, 61, 66, 66, 66, 66, 15, 15, 15, 15, 14, 14, 14, 15,
+        15, 15, 16, 18, 19, 20, 22, 24, 26, 28, 30, 32, 35, 38, 41, 44, 48, 51,
+        54, 58, 62, 62, 62, 62, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 16, 17,
+        18, 19, 21, 23, 25, 26, 28, 31, 34, 36, 39, 42, 45, 48, 51, 55, 59, 59,
+        59, 59, 13, 14, 14, 15, 16, 15, 15, 15, 15, 15, 16, 17, 18, 19, 20, 22,
+        24, 25, 27, 29, 32, 34, 37, 40, 43, 46, 49, 52, 56, 56, 56, 56, 14, 14,
+        14, 15, 15, 16, 16, 16, 16, 17, 18, 18, 19, 20, 22, 23, 25, 27, 29, 31,
+        34, 36, 38, 41, 45, 47, 50, 54, 58, 58, 58, 58, 14, 14, 14, 15, 15, 16,
+        16, 17, 18, 18, 19, 20, 21, 22, 24, 25, 27, 29, 31, 33, 36, 38, 40, 43,
+        46, 49, 52, 56, 60, 60, 60, 60, 14, 14, 15, 15, 15, 16, 17, 18, 19, 20,
+        21, 22, 23, 25, 26, 28, 29, 31, 33, 35, 38, 40, 42, 45, 48, 51, 54, 57,
+        61, 61, 61, 61, 15, 15, 15, 15, 15, 16, 18, 19, 22, 23, 24, 25, 26, 27,
+        29, 30, 32, 34, 35, 38, 40, 42, 45, 47, 50, 53, 56, 59, 63, 63, 63, 63,
+        16, 16, 15, 15, 15, 17, 18, 20, 23, 24, 25, 27, 28, 29, 31, 32, 34, 36,
+        38, 40, 42, 45, 47, 50, 53, 56, 59, 62, 66, 66, 66, 66, 17, 17, 16, 16,
+        16, 18, 19, 21, 24, 25, 27, 28, 30, 32, 33, 35, 37, 39, 41, 43, 45, 47,
+        50, 53, 56, 58, 61, 65, 68, 68, 68, 68, 18, 18, 18, 17, 17, 18, 20, 22,
+        25, 27, 28, 30, 33, 34, 36, 38, 40, 42, 44, 46, 48, 51, 53, 56, 59, 62,
+        64, 68, 71, 71, 71, 71, 20, 19, 19, 18, 18, 19, 21, 23, 26, 28, 30, 33,
+        35, 37, 39, 41, 43, 45, 47, 50, 52, 54, 57, 59, 62, 65, 68, 71, 74, 74,
+        74, 74, 22, 21, 20, 19, 19, 20, 22, 25, 27, 29, 32, 34, 37, 39, 41, 43,
+        46, 48, 50, 52, 55, 57, 60, 62, 65, 68, 71, 74, 77, 77, 77, 77, 23, 23,
+        22, 21, 20, 22, 24, 26, 29, 31, 33, 36, 39, 41, 43, 46, 49, 51, 53, 55,
+        58, 60, 63, 66, 69, 71, 74, 77, 81, 81, 81, 81, 26, 25, 24, 23, 22, 23,
+        25, 28, 30, 32, 35, 38, 41, 43, 46, 48, 52, 54, 56, 59, 62, 64, 67, 69,
+        72, 75, 78, 81, 84, 84, 84, 84, 28, 27, 26, 25, 24, 25, 27, 29, 32, 34,
+        37, 40, 43, 46, 49, 52, 55, 57, 60, 63, 66, 68, 71, 74, 77, 79, 82, 85,
+        88, 88, 88, 88, 30, 29, 28, 26, 25, 27, 29, 31, 34, 36, 39, 42, 45, 48,
+        51, 54, 57, 60, 62, 65, 69, 71, 74, 77, 80, 83, 85, 88, 92, 92, 92, 92,
+        33, 31, 30, 28, 27, 29, 31, 33, 35, 38, 41, 44, 47, 50, 53, 56, 60, 62,
+        65, 68, 72, 74, 77, 80, 83, 86, 89, 92, 95, 95, 95, 95, 36, 34, 32, 31,
+        29, 31, 33, 35, 38, 40, 43, 46, 50, 52, 55, 59, 63, 65, 68, 72, 75, 78,
+        81, 84, 87, 90, 93, 96, 99, 99, 99, 99, 40, 37, 35, 34, 32, 34, 36, 38,
+        40, 42, 45, 48, 52, 55, 58, 62, 66, 69, 72, 75, 79, 82, 85, 88, 91, 94,
+        97, 100, 103, 103, 103, 103, 42, 40, 38, 36, 34, 36, 38, 40, 42, 45, 47,
+        51, 54, 57, 60, 64, 68, 71, 74, 78, 82, 85, 88, 91, 94, 97, 100, 103,
+        107, 107, 107, 107, 45, 43, 41, 39, 37, 38, 40, 42, 45, 47, 50, 53, 57,
+        60, 63, 67, 71, 74, 77, 81, 85, 88, 91, 94, 98, 101, 104, 107, 110, 110,
+        110, 110, 49, 46, 44, 42, 40, 41, 43, 45, 47, 50, 53, 56, 59, 62, 66,
+        69, 74, 77, 80, 84, 88, 91, 94, 98, 101, 104, 107, 110, 114, 114, 114,
+        114, 53, 50, 48, 45, 43, 45, 46, 48, 50, 53, 56, 59, 62, 65, 69, 72, 77,
+        80, 83, 87, 91, 94, 98, 101, 105, 108, 111, 114, 118, 118, 118, 118, 57,
+        53, 51, 48, 46, 47, 49, 51, 53, 56, 58, 62, 65, 68, 71, 75, 79, 83, 86,
+        90, 94, 97, 101, 104, 108, 111, 114, 117, 121, 121, 121, 121, 60, 57,
+        54, 51, 49, 50, 52, 54, 56, 59, 61, 64, 68, 71, 74, 78, 82, 85, 89, 93,
+        97, 100, 104, 107, 111, 114, 117, 120, 124, 124, 124, 124, 65, 61, 58,
+        55, 52, 54, 56, 57, 59, 62, 65, 68, 71, 74, 77, 81, 85, 88, 92, 96, 100,
+        103, 107, 110, 114, 117, 120, 124, 127, 127, 127, 127, 69, 66, 62, 59,
+        56, 58, 60, 61, 63, 66, 68, 71, 74, 77, 81, 84, 88, 92, 95, 99, 103,
+        107, 110, 114, 118, 121, 124, 127, 130, 130, 130, 130, 69, 66, 62, 59,
+        56, 58, 60, 61, 63, 66, 68, 71, 74, 77, 81, 84, 88, 92, 95, 99, 103,
+        107, 110, 114, 118, 121, 124, 127, 130, 130, 130, 130, 69, 66, 62, 59,
+        56, 58, 60, 61, 63, 66, 68, 71, 74, 77, 81, 84, 88, 92, 95, 99, 103,
+        107, 110, 114, 118, 121, 124, 127, 130, 130, 130, 130, 69, 66, 62, 59,
+        56, 58, 60, 61, 63, 66, 68, 71, 74, 77, 81, 84, 88, 92, 95, 99, 103,
+        107, 110, 114, 118, 121, 124, 127, 130, 130, 130, 130 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 106, 117, 154, 106, 131, 141, 167, 117, 141, 191, 225, 154, 167,
+        225, 279,
+        /* Size 8 */
+        64, 51, 98, 104, 113, 128, 148, 172, 51, 76, 100, 89, 92, 103, 118, 136,
+        98, 100, 119, 115, 114, 121, 134, 151, 104, 89, 115, 132, 140, 147, 158,
+        173, 113, 92, 114, 140, 160, 174, 186, 201, 128, 103, 121, 147, 174,
+        195, 213, 229, 148, 118, 134, 158, 186, 213, 236, 256, 172, 136, 151,
+        173, 201, 229, 256, 280,
+        /* Size 16 */
+        64, 57, 51, 67, 98, 101, 104, 108, 113, 120, 128, 137, 148, 159, 172,
+        172, 57, 59, 61, 75, 99, 97, 96, 99, 101, 107, 114, 122, 131, 141, 152,
+        152, 51, 61, 76, 86, 100, 94, 89, 91, 92, 97, 103, 110, 118, 126, 136,
+        136, 67, 75, 86, 96, 109, 104, 100, 101, 102, 106, 111, 118, 125, 134,
+        143, 143, 98, 99, 100, 109, 119, 117, 115, 115, 114, 118, 121, 127, 134,
+        142, 151, 151, 101, 97, 94, 104, 117, 120, 123, 124, 126, 129, 133, 139,
+        145, 153, 161, 161, 104, 96, 89, 100, 115, 123, 132, 136, 140, 144, 147,
+        153, 158, 165, 173, 173, 108, 99, 91, 101, 115, 124, 136, 142, 149, 154,
+        160, 165, 171, 178, 186, 186, 113, 101, 92, 102, 114, 126, 140, 149,
+        160, 167, 174, 180, 186, 193, 201, 201, 120, 107, 97, 106, 118, 129,
+        144, 154, 167, 175, 184, 191, 199, 206, 214, 214, 128, 114, 103, 111,
+        121, 133, 147, 160, 174, 184, 195, 204, 213, 221, 229, 229, 137, 122,
+        110, 118, 127, 139, 153, 165, 180, 191, 204, 213, 224, 233, 242, 242,
+        148, 131, 118, 125, 134, 145, 158, 171, 186, 199, 213, 224, 236, 246,
+        256, 256, 159, 141, 126, 134, 142, 153, 165, 178, 193, 206, 221, 233,
+        246, 256, 267, 267, 172, 152, 136, 143, 151, 161, 173, 186, 201, 214,
+        229, 242, 256, 267, 280, 280, 172, 152, 136, 143, 151, 161, 173, 186,
+        201, 214, 229, 242, 256, 267, 280, 280,
+        /* Size 32 */
+        64, 60, 57, 54, 51, 58, 67, 79, 98, 99, 101, 103, 104, 106, 108, 110,
+        113, 116, 120, 124, 128, 133, 137, 143, 148, 153, 159, 165, 172, 172,
+        172, 172, 60, 59, 58, 56, 55, 62, 71, 82, 98, 99, 99, 100, 100, 102,
+        103, 105, 107, 110, 113, 117, 121, 125, 129, 134, 139, 144, 149, 155,
+        161, 161, 161, 161, 57, 58, 59, 60, 61, 67, 75, 85, 99, 98, 97, 97, 96,
+        97, 99, 100, 101, 104, 107, 110, 114, 118, 122, 126, 131, 136, 141, 146,
+        152, 152, 152, 152, 54, 56, 60, 63, 67, 73, 80, 89, 99, 97, 96, 94, 93,
+        94, 94, 95, 97, 99, 102, 105, 108, 112, 115, 120, 124, 128, 133, 138,
+        144, 144, 144, 144, 51, 55, 61, 67, 76, 81, 86, 92, 100, 97, 94, 92, 89,
+        90, 91, 91, 92, 95, 97, 100, 103, 106, 110, 113, 118, 122, 126, 131,
+        136, 136, 136, 136, 58, 62, 67, 73, 81, 85, 91, 97, 104, 101, 99, 97,
+        94, 95, 96, 96, 97, 99, 101, 104, 107, 110, 113, 117, 121, 125, 130,
+        134, 140, 140, 140, 140, 67, 71, 75, 80, 86, 91, 96, 102, 109, 106, 104,
+        102, 100, 101, 101, 102, 102, 104, 106, 109, 111, 114, 118, 121, 125,
+        129, 134, 138, 143, 143, 143, 143, 79, 82, 85, 89, 92, 97, 102, 108,
+        114, 112, 110, 109, 107, 107, 107, 108, 108, 110, 112, 114, 116, 119,
+        122, 126, 129, 133, 138, 142, 147, 147, 147, 147, 98, 98, 99, 99, 100,
+        104, 109, 114, 119, 118, 117, 116, 115, 115, 115, 114, 114, 116, 118,
+        119, 121, 124, 127, 130, 134, 138, 142, 146, 151, 151, 151, 151, 99, 99,
+        98, 97, 97, 101, 106, 112, 118, 118, 118, 119, 119, 119, 119, 120, 120,
+        122, 123, 125, 127, 130, 133, 136, 139, 143, 147, 151, 156, 156, 156,
+        156, 101, 99, 97, 96, 94, 99, 104, 110, 117, 118, 120, 121, 123, 124,
+        124, 125, 126, 128, 129, 131, 133, 136, 139, 142, 145, 149, 153, 157,
+        161, 161, 161, 161, 103, 100, 97, 94, 92, 97, 102, 109, 116, 119, 121,
+        124, 127, 129, 130, 131, 133, 134, 136, 138, 140, 143, 145, 148, 151,
+        155, 159, 163, 167, 167, 167, 167, 104, 100, 96, 93, 89, 94, 100, 107,
+        115, 119, 123, 127, 132, 134, 136, 138, 140, 142, 144, 146, 147, 150,
+        153, 155, 158, 162, 165, 169, 173, 173, 173, 173, 106, 102, 97, 94, 90,
+        95, 101, 107, 115, 119, 124, 129, 134, 137, 139, 142, 145, 147, 149,
+        151, 153, 156, 159, 162, 164, 168, 172, 175, 179, 179, 179, 179, 108,
+        103, 99, 94, 91, 96, 101, 107, 115, 119, 124, 130, 136, 139, 142, 146,
+        149, 152, 154, 157, 160, 162, 165, 168, 171, 175, 178, 182, 186, 186,
+        186, 186, 110, 105, 100, 95, 91, 96, 102, 108, 114, 120, 125, 131, 138,
+        142, 146, 150, 154, 157, 160, 163, 166, 169, 172, 175, 178, 182, 185,
+        189, 193, 193, 193, 193, 113, 107, 101, 97, 92, 97, 102, 108, 114, 120,
+        126, 133, 140, 145, 149, 154, 160, 163, 167, 170, 174, 177, 180, 183,
+        186, 190, 193, 197, 201, 201, 201, 201, 116, 110, 104, 99, 95, 99, 104,
+        110, 116, 122, 128, 134, 142, 147, 152, 157, 163, 167, 171, 175, 179,
+        182, 185, 189, 192, 196, 199, 203, 207, 207, 207, 207, 120, 113, 107,
+        102, 97, 101, 106, 112, 118, 123, 129, 136, 144, 149, 154, 160, 167,
+        171, 175, 179, 184, 187, 191, 195, 199, 202, 206, 210, 214, 214, 214,
+        214, 124, 117, 110, 105, 100, 104, 109, 114, 119, 125, 131, 138, 146,
+        151, 157, 163, 170, 175, 179, 184, 190, 193, 197, 201, 206, 209, 213,
+        217, 221, 221, 221, 221, 128, 121, 114, 108, 103, 107, 111, 116, 121,
+        127, 133, 140, 147, 153, 160, 166, 174, 179, 184, 190, 195, 200, 204,
+        208, 213, 217, 221, 225, 229, 229, 229, 229, 133, 125, 118, 112, 106,
+        110, 114, 119, 124, 130, 136, 143, 150, 156, 162, 169, 177, 182, 187,
+        193, 200, 204, 209, 213, 218, 222, 227, 231, 235, 235, 235, 235, 137,
+        129, 122, 115, 110, 113, 118, 122, 127, 133, 139, 145, 153, 159, 165,
+        172, 180, 185, 191, 197, 204, 209, 213, 219, 224, 228, 233, 237, 242,
+        242, 242, 242, 143, 134, 126, 120, 113, 117, 121, 126, 130, 136, 142,
+        148, 155, 162, 168, 175, 183, 189, 195, 201, 208, 213, 219, 224, 230,
+        234, 239, 244, 249, 249, 249, 249, 148, 139, 131, 124, 118, 121, 125,
+        129, 134, 139, 145, 151, 158, 164, 171, 178, 186, 192, 199, 206, 213,
+        218, 224, 230, 236, 241, 246, 251, 256, 256, 256, 256, 153, 144, 136,
+        128, 122, 125, 129, 133, 138, 143, 149, 155, 162, 168, 175, 182, 190,
+        196, 202, 209, 217, 222, 228, 234, 241, 246, 251, 256, 262, 262, 262,
+        262, 159, 149, 141, 133, 126, 130, 134, 138, 142, 147, 153, 159, 165,
+        172, 178, 185, 193, 199, 206, 213, 221, 227, 233, 239, 246, 251, 256,
+        262, 267, 267, 267, 267, 165, 155, 146, 138, 131, 134, 138, 142, 146,
+        151, 157, 163, 169, 175, 182, 189, 197, 203, 210, 217, 225, 231, 237,
+        244, 251, 256, 262, 267, 273, 273, 273, 273, 172, 161, 152, 144, 136,
+        140, 143, 147, 151, 156, 161, 167, 173, 179, 186, 193, 201, 207, 214,
+        221, 229, 235, 242, 249, 256, 262, 267, 273, 280, 280, 280, 280, 172,
+        161, 152, 144, 136, 140, 143, 147, 151, 156, 161, 167, 173, 179, 186,
+        193, 201, 207, 214, 221, 229, 235, 242, 249, 256, 262, 267, 273, 280,
+        280, 280, 280, 172, 161, 152, 144, 136, 140, 143, 147, 151, 156, 161,
+        167, 173, 179, 186, 193, 201, 207, 214, 221, 229, 235, 242, 249, 256,
+        262, 267, 273, 280, 280, 280, 280, 172, 161, 152, 144, 136, 140, 143,
+        147, 151, 156, 161, 167, 173, 179, 186, 193, 201, 207, 214, 221, 229,
+        235, 242, 249, 256, 262, 267, 273, 280, 280, 280, 280 },
+      { /* Intra matrices */
+        /* Size 4 */
+        23, 40, 44, 59, 40, 50, 54, 64, 44, 54, 74, 89, 59, 64, 89, 114,
+        /* Size 8 */
+        25, 20, 39, 42, 46, 52, 61, 72, 20, 30, 40, 36, 37, 41, 48, 56, 39, 40,
+        49, 47, 46, 49, 55, 62, 42, 36, 47, 54, 58, 61, 66, 73, 46, 37, 46, 58,
+        67, 73, 79, 85, 52, 41, 49, 61, 73, 83, 91, 99, 61, 48, 55, 66, 79, 91,
+        103, 113, 72, 56, 62, 73, 85, 99, 113, 125,
+        /* Size 16 */
+        24, 22, 19, 26, 38, 39, 41, 42, 44, 47, 51, 55, 59, 64, 69, 69, 22, 22,
+        23, 29, 38, 38, 37, 38, 39, 42, 45, 48, 52, 56, 61, 61, 19, 23, 29, 33,
+        39, 37, 34, 35, 36, 38, 40, 43, 46, 50, 54, 54, 26, 29, 33, 37, 42, 41,
+        39, 39, 40, 42, 43, 46, 49, 53, 57, 57, 38, 38, 39, 42, 47, 46, 45, 45,
+        45, 46, 48, 50, 53, 56, 60, 60, 39, 38, 37, 41, 46, 47, 48, 49, 50, 51,
+        53, 55, 58, 61, 65, 65, 41, 37, 34, 39, 45, 48, 52, 54, 56, 57, 59, 61,
+        64, 67, 70, 70, 42, 38, 35, 39, 45, 49, 54, 57, 60, 62, 64, 67, 69, 72,
+        76, 76, 44, 39, 36, 40, 45, 50, 56, 60, 64, 67, 70, 73, 76, 79, 82, 82,
+        47, 42, 38, 42, 46, 51, 57, 62, 67, 71, 75, 78, 82, 85, 89, 89, 51, 45,
+        40, 43, 48, 53, 59, 64, 70, 75, 80, 84, 88, 92, 96, 96, 55, 48, 43, 46,
+        50, 55, 61, 67, 73, 78, 84, 88, 93, 97, 102, 102, 59, 52, 46, 49, 53,
+        58, 64, 69, 76, 82, 88, 93, 99, 104, 109, 109, 64, 56, 50, 53, 56, 61,
+        67, 72, 79, 85, 92, 97, 104, 109, 114, 114, 69, 61, 54, 57, 60, 65, 70,
+        76, 82, 89, 96, 102, 109, 114, 120, 120, 69, 61, 54, 57, 60, 65, 70, 76,
+        82, 89, 96, 102, 109, 114, 120, 120,
+        /* Size 32 */
+        24, 22, 21, 20, 19, 22, 25, 30, 37, 38, 39, 39, 40, 41, 42, 42, 43, 45,
+        46, 48, 50, 51, 54, 56, 58, 60, 63, 65, 68, 68, 68, 68, 22, 22, 22, 21,
+        21, 23, 27, 31, 37, 38, 38, 38, 38, 39, 40, 40, 41, 42, 44, 45, 47, 48,
+        50, 52, 54, 56, 59, 61, 64, 64, 64, 64, 21, 22, 22, 22, 23, 25, 28, 32,
+        38, 37, 37, 37, 37, 37, 38, 38, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53,
+        55, 57, 60, 60, 60, 60, 20, 21, 22, 24, 25, 28, 30, 34, 38, 37, 36, 36,
+        35, 36, 36, 36, 37, 38, 39, 40, 41, 43, 44, 46, 48, 50, 52, 54, 56, 56,
+        56, 56, 19, 21, 23, 25, 29, 30, 33, 35, 38, 37, 36, 35, 34, 34, 34, 35,
+        35, 36, 37, 38, 39, 41, 42, 44, 45, 47, 49, 51, 53, 53, 53, 53, 22, 23,
+        25, 28, 30, 32, 34, 37, 40, 39, 38, 37, 36, 36, 36, 37, 37, 38, 39, 40,
+        41, 42, 44, 45, 47, 49, 50, 52, 54, 54, 54, 54, 25, 27, 28, 30, 33, 34,
+        37, 39, 42, 41, 40, 39, 38, 38, 39, 39, 39, 40, 41, 42, 43, 44, 45, 47,
+        48, 50, 52, 54, 56, 56, 56, 56, 30, 31, 32, 34, 35, 37, 39, 41, 44, 43,
+        42, 42, 41, 41, 41, 41, 41, 42, 43, 44, 45, 46, 47, 49, 50, 52, 54, 55,
+        57, 57, 57, 57, 37, 37, 38, 38, 38, 40, 42, 44, 46, 46, 45, 45, 44, 44,
+        44, 44, 44, 45, 45, 46, 47, 48, 49, 51, 52, 54, 55, 57, 59, 59, 59, 59,
+        38, 38, 37, 37, 37, 39, 41, 43, 46, 46, 46, 46, 46, 46, 46, 46, 46, 47,
+        48, 48, 49, 50, 52, 53, 54, 56, 58, 59, 61, 61, 61, 61, 39, 38, 37, 36,
+        36, 38, 40, 42, 45, 46, 46, 47, 48, 48, 48, 48, 49, 49, 50, 51, 52, 53,
+        54, 55, 57, 58, 60, 62, 64, 64, 64, 64, 39, 38, 37, 36, 35, 37, 39, 42,
+        45, 46, 47, 48, 49, 50, 50, 51, 52, 52, 53, 54, 55, 56, 57, 58, 59, 61,
+        63, 64, 66, 66, 66, 66, 40, 38, 37, 35, 34, 36, 38, 41, 44, 46, 48, 49,
+        51, 52, 53, 54, 55, 55, 56, 57, 58, 59, 60, 61, 62, 64, 65, 67, 69, 69,
+        69, 69, 41, 39, 37, 36, 34, 36, 38, 41, 44, 46, 48, 50, 52, 53, 54, 55,
+        57, 57, 58, 59, 60, 61, 63, 64, 65, 67, 68, 70, 71, 71, 71, 71, 42, 40,
+        38, 36, 34, 36, 39, 41, 44, 46, 48, 50, 53, 54, 56, 57, 59, 60, 61, 62,
+        63, 64, 65, 67, 68, 69, 71, 73, 74, 74, 74, 74, 42, 40, 38, 36, 35, 37,
+        39, 41, 44, 46, 48, 51, 54, 55, 57, 59, 61, 62, 63, 64, 66, 67, 68, 70,
+        71, 73, 74, 76, 78, 78, 78, 78, 43, 41, 39, 37, 35, 37, 39, 41, 44, 46,
+        49, 52, 55, 57, 59, 61, 63, 64, 66, 67, 69, 70, 72, 73, 75, 76, 78, 79,
+        81, 81, 81, 81, 45, 42, 40, 38, 36, 38, 40, 42, 45, 47, 49, 52, 55, 57,
+        60, 62, 64, 66, 68, 69, 71, 73, 74, 76, 77, 79, 80, 82, 84, 84, 84, 84,
+        46, 44, 41, 39, 37, 39, 41, 43, 45, 48, 50, 53, 56, 58, 61, 63, 66, 68,
+        70, 71, 74, 75, 77, 78, 80, 82, 83, 85, 87, 87, 87, 87, 48, 45, 42, 40,
+        38, 40, 42, 44, 46, 48, 51, 54, 57, 59, 62, 64, 67, 69, 71, 74, 76, 78,
+        79, 81, 83, 85, 87, 89, 90, 90, 90, 90, 50, 47, 44, 41, 39, 41, 43, 45,
+        47, 49, 52, 55, 58, 60, 63, 66, 69, 71, 74, 76, 79, 80, 82, 84, 87, 88,
+        90, 92, 94, 94, 94, 94, 51, 48, 45, 43, 41, 42, 44, 46, 48, 50, 53, 56,
+        59, 61, 64, 67, 70, 73, 75, 78, 80, 82, 85, 87, 89, 91, 93, 95, 97, 97,
+        97, 97, 54, 50, 47, 44, 42, 44, 45, 47, 49, 52, 54, 57, 60, 63, 65, 68,
+        72, 74, 77, 79, 82, 85, 87, 89, 92, 94, 96, 98, 100, 100, 100, 100, 56,
+        52, 49, 46, 44, 45, 47, 49, 51, 53, 55, 58, 61, 64, 67, 70, 73, 76, 78,
+        81, 84, 87, 89, 92, 94, 96, 99, 101, 103, 103, 103, 103, 58, 54, 51, 48,
+        45, 47, 48, 50, 52, 54, 57, 59, 62, 65, 68, 71, 75, 77, 80, 83, 87, 89,
+        92, 94, 97, 99, 102, 104, 107, 107, 107, 107, 60, 56, 53, 50, 47, 49,
+        50, 52, 54, 56, 58, 61, 64, 67, 69, 73, 76, 79, 82, 85, 88, 91, 94, 96,
+        99, 102, 104, 107, 109, 109, 109, 109, 63, 59, 55, 52, 49, 50, 52, 54,
+        55, 58, 60, 63, 65, 68, 71, 74, 78, 80, 83, 87, 90, 93, 96, 99, 102,
+        104, 107, 109, 112, 112, 112, 112, 65, 61, 57, 54, 51, 52, 54, 55, 57,
+        59, 62, 64, 67, 70, 73, 76, 79, 82, 85, 89, 92, 95, 98, 101, 104, 107,
+        109, 112, 115, 115, 115, 115, 68, 64, 60, 56, 53, 54, 56, 57, 59, 61,
+        64, 66, 69, 71, 74, 78, 81, 84, 87, 90, 94, 97, 100, 103, 107, 109, 112,
+        115, 118, 118, 118, 118, 68, 64, 60, 56, 53, 54, 56, 57, 59, 61, 64, 66,
+        69, 71, 74, 78, 81, 84, 87, 90, 94, 97, 100, 103, 107, 109, 112, 115,
+        118, 118, 118, 118, 68, 64, 60, 56, 53, 54, 56, 57, 59, 61, 64, 66, 69,
+        71, 74, 78, 81, 84, 87, 90, 94, 97, 100, 103, 107, 109, 112, 115, 118,
+        118, 118, 118, 68, 64, 60, 56, 53, 54, 56, 57, 59, 61, 64, 66, 69, 71,
+        74, 78, 81, 84, 87, 90, 94, 97, 100, 103, 107, 109, 112, 115, 118, 118,
+        118, 118 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 70, 120, 197, 70, 109, 156, 218, 120, 156, 229, 287, 197, 218, 287,
+        344,
+        /* Size 8 */
+        64, 47, 51, 69, 94, 126, 161, 197, 47, 55, 52, 62, 80, 105, 135, 167,
+        51, 52, 75, 88, 105, 127, 154, 183, 69, 62, 88, 115, 136, 157, 181, 207,
+        94, 80, 105, 136, 165, 189, 212, 234, 126, 105, 127, 157, 189, 216, 240,
+        261, 161, 135, 154, 181, 212, 240, 264, 284, 197, 167, 183, 207, 234,
+        261, 284, 303,
+        /* Size 16 */
+        64, 54, 47, 49, 51, 59, 69, 80, 94, 108, 126, 141, 161, 177, 197, 197,
+        54, 53, 51, 51, 52, 58, 65, 74, 87, 99, 115, 129, 147, 162, 181, 181,
+        47, 51, 55, 53, 52, 57, 62, 70, 80, 91, 105, 118, 135, 149, 167, 167,
+        49, 51, 53, 57, 62, 67, 73, 81, 91, 102, 115, 128, 144, 158, 175, 175,
+        51, 52, 52, 62, 75, 81, 88, 96, 105, 115, 127, 139, 154, 167, 183, 183,
+        59, 58, 57, 67, 81, 89, 100, 108, 118, 128, 140, 152, 166, 179, 195,
+        195, 69, 65, 62, 73, 88, 100, 115, 124, 136, 146, 157, 168, 181, 193,
+        207, 207, 80, 74, 70, 81, 96, 108, 124, 136, 149, 159, 172, 183, 195,
+        207, 220, 220, 94, 87, 80, 91, 105, 118, 136, 149, 165, 176, 189, 200,
+        212, 222, 234, 234, 108, 99, 91, 102, 115, 128, 146, 159, 176, 188, 202,
+        213, 225, 235, 247, 247, 126, 115, 105, 115, 127, 140, 157, 172, 189,
+        202, 216, 228, 240, 250, 261, 261, 141, 129, 118, 128, 139, 152, 168,
+        183, 200, 213, 228, 239, 251, 261, 272, 272, 161, 147, 135, 144, 154,
+        166, 181, 195, 212, 225, 240, 251, 264, 273, 284, 284, 177, 162, 149,
+        158, 167, 179, 193, 207, 222, 235, 250, 261, 273, 283, 293, 293, 197,
+        181, 167, 175, 183, 195, 207, 220, 234, 247, 261, 272, 284, 293, 303,
+        303, 197, 181, 167, 175, 183, 195, 207, 220, 234, 247, 261, 272, 284,
+        293, 303, 303,
+        /* Size 32 */
+        64, 59, 54, 51, 47, 48, 49, 50, 51, 55, 59, 64, 69, 74, 80, 86, 94, 101,
+        108, 116, 126, 133, 141, 150, 161, 169, 177, 186, 197, 197, 197, 197,
+        59, 56, 54, 51, 49, 50, 50, 51, 52, 55, 58, 62, 67, 72, 77, 83, 90, 96,
+        103, 111, 120, 127, 135, 143, 153, 161, 169, 178, 189, 189, 189, 189,
+        54, 54, 53, 52, 51, 51, 51, 52, 52, 55, 58, 61, 65, 69, 74, 80, 87, 92,
+        99, 106, 115, 121, 129, 137, 147, 154, 162, 171, 181, 181, 181, 181, 51,
+        51, 52, 52, 53, 53, 52, 52, 52, 55, 57, 60, 63, 67, 72, 77, 83, 89, 95,
+        102, 110, 116, 123, 131, 141, 148, 155, 164, 174, 174, 174, 174, 47, 49,
+        51, 53, 55, 54, 53, 53, 52, 54, 57, 59, 62, 66, 70, 75, 80, 85, 91, 98,
+        105, 111, 118, 126, 135, 142, 149, 158, 167, 167, 167, 167, 48, 50, 51,
+        53, 54, 55, 55, 56, 57, 59, 61, 64, 67, 71, 75, 80, 85, 90, 96, 102,
+        110, 116, 123, 130, 139, 146, 153, 162, 171, 171, 171, 171, 49, 50, 51,
+        52, 53, 55, 57, 59, 62, 64, 67, 70, 73, 77, 81, 86, 91, 96, 102, 108,
+        115, 121, 128, 135, 144, 150, 158, 166, 175, 175, 175, 175, 50, 51, 52,
+        52, 53, 56, 59, 63, 68, 70, 73, 76, 80, 84, 88, 92, 97, 102, 108, 114,
+        121, 127, 133, 140, 148, 155, 162, 170, 179, 179, 179, 179, 51, 52, 52,
+        52, 52, 57, 62, 68, 75, 78, 81, 84, 88, 92, 96, 100, 105, 109, 115, 120,
+        127, 133, 139, 146, 154, 160, 167, 175, 183, 183, 183, 183, 55, 55, 55,
+        55, 54, 59, 64, 70, 78, 81, 85, 89, 94, 98, 102, 106, 111, 116, 121,
+        127, 133, 139, 145, 152, 160, 166, 173, 181, 189, 189, 189, 189, 59, 58,
+        58, 57, 57, 61, 67, 73, 81, 85, 89, 94, 100, 104, 108, 113, 118, 123,
+        128, 134, 140, 146, 152, 159, 166, 173, 179, 187, 195, 195, 195, 195,
+        64, 62, 61, 60, 59, 64, 70, 76, 84, 89, 94, 100, 107, 111, 116, 121,
+        126, 131, 137, 142, 148, 154, 160, 166, 173, 180, 186, 193, 201, 201,
+        201, 201, 69, 67, 65, 63, 62, 67, 73, 80, 88, 94, 100, 107, 115, 119,
+        124, 130, 136, 141, 146, 151, 157, 163, 168, 175, 181, 187, 193, 200,
+        207, 207, 207, 207, 74, 72, 69, 67, 66, 71, 77, 84, 92, 98, 104, 111,
+        119, 124, 130, 136, 142, 147, 152, 158, 164, 170, 175, 181, 188, 194,
+        200, 206, 213, 213, 213, 213, 80, 77, 74, 72, 70, 75, 81, 88, 96, 102,
+        108, 116, 124, 130, 136, 142, 149, 154, 159, 165, 172, 177, 183, 189,
+        195, 201, 207, 213, 220, 220, 220, 220, 86, 83, 80, 77, 75, 80, 86, 92,
+        100, 106, 113, 121, 130, 136, 142, 149, 156, 162, 167, 173, 180, 185,
+        191, 197, 203, 209, 214, 220, 227, 227, 227, 227, 94, 90, 87, 83, 80,
+        85, 91, 97, 105, 111, 118, 126, 136, 142, 149, 156, 165, 170, 176, 182,
+        189, 194, 200, 205, 212, 217, 222, 228, 234, 234, 234, 234, 101, 96, 92,
+        89, 85, 90, 96, 102, 109, 116, 123, 131, 141, 147, 154, 162, 170, 176,
+        182, 188, 195, 200, 206, 212, 218, 223, 229, 234, 240, 240, 240, 240,
+        108, 103, 99, 95, 91, 96, 102, 108, 115, 121, 128, 137, 146, 152, 159,
+        167, 176, 182, 188, 195, 202, 207, 213, 219, 225, 230, 235, 241, 247,
+        247, 247, 247, 116, 111, 106, 102, 98, 102, 108, 114, 120, 127, 134,
+        142, 151, 158, 165, 173, 182, 188, 195, 201, 209, 214, 220, 226, 232,
+        237, 242, 248, 254, 254, 254, 254, 126, 120, 115, 110, 105, 110, 115,
+        121, 127, 133, 140, 148, 157, 164, 172, 180, 189, 195, 202, 209, 216,
+        222, 228, 234, 240, 245, 250, 255, 261, 261, 261, 261, 133, 127, 121,
+        116, 111, 116, 121, 127, 133, 139, 146, 154, 163, 170, 177, 185, 194,
+        200, 207, 214, 222, 227, 233, 239, 245, 250, 255, 261, 266, 266, 266,
+        266, 141, 135, 129, 123, 118, 123, 128, 133, 139, 145, 152, 160, 168,
+        175, 183, 191, 200, 206, 213, 220, 228, 233, 239, 245, 251, 256, 261,
+        266, 272, 272, 272, 272, 150, 143, 137, 131, 126, 130, 135, 140, 146,
+        152, 159, 166, 175, 181, 189, 197, 205, 212, 219, 226, 234, 239, 245,
+        251, 257, 262, 267, 272, 278, 278, 278, 278, 161, 153, 147, 141, 135,
+        139, 144, 148, 154, 160, 166, 173, 181, 188, 195, 203, 212, 218, 225,
+        232, 240, 245, 251, 257, 264, 268, 273, 278, 284, 284, 284, 284, 169,
+        161, 154, 148, 142, 146, 150, 155, 160, 166, 173, 180, 187, 194, 201,
+        209, 217, 223, 230, 237, 245, 250, 256, 262, 268, 273, 278, 283, 288,
+        288, 288, 288, 177, 169, 162, 155, 149, 153, 158, 162, 167, 173, 179,
+        186, 193, 200, 207, 214, 222, 229, 235, 242, 250, 255, 261, 267, 273,
+        278, 283, 288, 293, 293, 293, 293, 186, 178, 171, 164, 158, 162, 166,
+        170, 175, 181, 187, 193, 200, 206, 213, 220, 228, 234, 241, 248, 255,
+        261, 266, 272, 278, 283, 288, 293, 298, 298, 298, 298, 197, 189, 181,
+        174, 167, 171, 175, 179, 183, 189, 195, 201, 207, 213, 220, 227, 234,
+        240, 247, 254, 261, 266, 272, 278, 284, 288, 293, 298, 303, 303, 303,
+        303, 197, 189, 181, 174, 167, 171, 175, 179, 183, 189, 195, 201, 207,
+        213, 220, 227, 234, 240, 247, 254, 261, 266, 272, 278, 284, 288, 293,
+        298, 303, 303, 303, 303, 197, 189, 181, 174, 167, 171, 175, 179, 183,
+        189, 195, 201, 207, 213, 220, 227, 234, 240, 247, 254, 261, 266, 272,
+        278, 284, 288, 293, 298, 303, 303, 303, 303, 197, 189, 181, 174, 167,
+        171, 175, 179, 183, 189, 195, 201, 207, 213, 220, 227, 234, 240, 247,
+        254, 261, 266, 272, 278, 284, 288, 293, 298, 303, 303, 303, 303 },
+      { /* Intra matrices */
+        /* Size 4 */
+        19, 21, 37, 63, 21, 33, 49, 70, 37, 49, 74, 96, 63, 70, 96, 119,
+        /* Size 8 */
+        23, 17, 18, 25, 34, 47, 61, 77, 17, 19, 18, 22, 29, 38, 50, 64, 18, 18,
+        27, 32, 38, 47, 58, 71, 25, 22, 32, 42, 51, 60, 70, 81, 34, 29, 38, 51,
+        63, 73, 83, 94, 47, 38, 47, 60, 73, 85, 96, 106, 61, 50, 58, 70, 83, 96,
+        108, 118, 77, 64, 71, 81, 94, 106, 118, 127,
+        /* Size 16 */
+        22, 18, 16, 17, 17, 20, 23, 27, 33, 38, 45, 51, 58, 65, 73, 73, 18, 18,
+        17, 17, 18, 20, 22, 25, 30, 34, 40, 46, 53, 59, 67, 67, 16, 17, 19, 18,
+        18, 19, 21, 24, 28, 32, 37, 42, 48, 54, 61, 61, 17, 17, 18, 19, 21, 23,
+        25, 28, 31, 35, 40, 45, 52, 57, 64, 64, 17, 18, 18, 21, 26, 28, 31, 33,
+        37, 40, 45, 50, 55, 61, 68, 68, 20, 20, 19, 23, 28, 31, 35, 38, 42, 46,
+        50, 55, 61, 66, 72, 72, 23, 22, 21, 25, 31, 35, 40, 44, 48, 52, 57, 61,
+        67, 72, 78, 78, 27, 25, 24, 28, 33, 38, 44, 48, 54, 58, 63, 67, 73, 78,
+        83, 83, 33, 30, 28, 31, 37, 42, 48, 54, 60, 64, 70, 74, 80, 84, 90, 90,
+        38, 34, 32, 35, 40, 46, 52, 58, 64, 69, 75, 80, 85, 90, 95, 95, 45, 40,
+        37, 40, 45, 50, 57, 63, 70, 75, 82, 87, 92, 97, 102, 102, 51, 46, 42,
+        45, 50, 55, 61, 67, 74, 80, 87, 92, 97, 102, 107, 107, 58, 53, 48, 52,
+        55, 61, 67, 73, 80, 85, 92, 97, 103, 107, 112, 112, 65, 59, 54, 57, 61,
+        66, 72, 78, 84, 90, 97, 102, 107, 112, 117, 117, 73, 67, 61, 64, 68, 72,
+        78, 83, 90, 95, 102, 107, 112, 117, 122, 122, 73, 67, 61, 64, 68, 72,
+        78, 83, 90, 95, 102, 107, 112, 117, 122, 122,
+        /* Size 32 */
+        21, 19, 18, 17, 16, 16, 16, 17, 17, 18, 20, 21, 23, 25, 27, 29, 32, 34,
+        37, 40, 44, 46, 49, 53, 57, 60, 63, 67, 72, 72, 72, 72, 19, 19, 18, 17,
+        16, 16, 17, 17, 17, 18, 19, 21, 22, 24, 26, 28, 31, 33, 35, 38, 41, 44,
+        47, 50, 54, 57, 60, 64, 68, 68, 68, 68, 18, 18, 17, 17, 17, 17, 17, 17,
+        17, 18, 19, 20, 22, 23, 25, 27, 29, 31, 34, 36, 39, 42, 45, 48, 52, 54,
+        57, 61, 65, 65, 65, 65, 17, 17, 17, 17, 17, 17, 17, 17, 17, 18, 19, 20,
+        21, 22, 24, 26, 28, 30, 32, 35, 38, 40, 43, 46, 49, 52, 55, 58, 62, 62,
+        62, 62, 16, 16, 17, 17, 18, 18, 18, 17, 17, 18, 19, 20, 21, 22, 23, 25,
+        27, 29, 31, 33, 36, 38, 41, 44, 47, 50, 53, 56, 60, 60, 60, 60, 16, 16,
+        17, 17, 18, 18, 18, 18, 19, 19, 20, 21, 22, 24, 25, 27, 29, 31, 33, 35,
+        38, 40, 42, 45, 49, 51, 54, 57, 61, 61, 61, 61, 16, 17, 17, 17, 18, 18,
+        19, 20, 20, 21, 22, 23, 24, 26, 27, 29, 31, 33, 35, 37, 40, 42, 44, 47,
+        50, 53, 56, 59, 63, 63, 63, 63, 17, 17, 17, 17, 17, 18, 20, 21, 22, 23,
+        24, 26, 27, 28, 30, 31, 33, 35, 37, 39, 42, 44, 46, 49, 52, 55, 58, 61,
+        64, 64, 64, 64, 17, 17, 17, 17, 17, 19, 20, 22, 25, 26, 27, 28, 30, 31,
+        32, 34, 36, 37, 39, 42, 44, 46, 49, 51, 54, 57, 60, 63, 66, 66, 66, 66,
+        18, 18, 18, 18, 18, 19, 21, 23, 26, 27, 29, 30, 32, 33, 35, 36, 38, 40,
+        42, 44, 46, 49, 51, 54, 57, 59, 62, 65, 68, 68, 68, 68, 20, 19, 19, 19,
+        19, 20, 22, 24, 27, 29, 30, 32, 34, 35, 37, 39, 41, 43, 45, 47, 49, 51,
+        54, 56, 59, 62, 64, 67, 71, 71, 71, 71, 21, 21, 20, 20, 20, 21, 23, 26,
+        28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 57, 59, 62, 64,
+        67, 70, 73, 73, 73, 73, 23, 22, 22, 21, 21, 22, 24, 27, 30, 32, 34, 36,
+        39, 41, 43, 45, 47, 49, 51, 53, 56, 58, 60, 62, 65, 68, 70, 73, 76, 76,
+        76, 76, 25, 24, 23, 22, 22, 24, 26, 28, 31, 33, 35, 38, 41, 43, 45, 47,
+        50, 52, 54, 56, 58, 60, 63, 65, 68, 70, 73, 76, 78, 78, 78, 78, 27, 26,
+        25, 24, 23, 25, 27, 30, 32, 35, 37, 40, 43, 45, 47, 50, 52, 54, 56, 59,
+        61, 63, 66, 68, 71, 73, 76, 78, 81, 81, 81, 81, 29, 28, 27, 26, 25, 27,
+        29, 31, 34, 36, 39, 42, 45, 47, 50, 52, 55, 57, 60, 62, 65, 67, 69, 72,
+        74, 76, 79, 82, 84, 84, 84, 84, 32, 31, 29, 28, 27, 29, 31, 33, 36, 38,
+        41, 44, 47, 50, 52, 55, 58, 61, 63, 66, 68, 70, 73, 75, 78, 80, 82, 85,
+        88, 88, 88, 88, 34, 33, 31, 30, 29, 31, 33, 35, 37, 40, 43, 46, 49, 52,
+        54, 57, 61, 63, 65, 68, 71, 73, 75, 78, 80, 83, 85, 88, 90, 90, 90, 90,
+        37, 35, 34, 32, 31, 33, 35, 37, 39, 42, 45, 48, 51, 54, 56, 60, 63, 65,
+        68, 71, 74, 76, 78, 81, 83, 86, 88, 90, 93, 93, 93, 93, 40, 38, 36, 35,
+        33, 35, 37, 39, 42, 44, 47, 50, 53, 56, 59, 62, 66, 68, 71, 73, 77, 79,
+        81, 84, 87, 89, 91, 93, 96, 96, 96, 96, 44, 41, 39, 38, 36, 38, 40, 42,
+        44, 46, 49, 52, 56, 58, 61, 65, 68, 71, 74, 77, 80, 82, 85, 87, 90, 92,
+        94, 97, 99, 99, 99, 99, 46, 44, 42, 40, 38, 40, 42, 44, 46, 49, 51, 54,
+        58, 60, 63, 67, 70, 73, 76, 79, 82, 84, 87, 90, 92, 95, 97, 99, 102,
+        102, 102, 102, 49, 47, 45, 43, 41, 42, 44, 46, 49, 51, 54, 57, 60, 63,
+        66, 69, 73, 75, 78, 81, 85, 87, 89, 92, 95, 97, 99, 102, 104, 104, 104,
+        104, 53, 50, 48, 46, 44, 45, 47, 49, 51, 54, 56, 59, 62, 65, 68, 72, 75,
+        78, 81, 84, 87, 90, 92, 95, 98, 100, 102, 105, 107, 107, 107, 107, 57,
+        54, 52, 49, 47, 49, 50, 52, 54, 57, 59, 62, 65, 68, 71, 74, 78, 80, 83,
+        87, 90, 92, 95, 98, 101, 103, 105, 107, 110, 110, 110, 110, 60, 57, 54,
+        52, 50, 51, 53, 55, 57, 59, 62, 64, 68, 70, 73, 76, 80, 83, 86, 89, 92,
+        95, 97, 100, 103, 105, 107, 110, 112, 112, 112, 112, 63, 60, 57, 55, 53,
+        54, 56, 58, 60, 62, 64, 67, 70, 73, 76, 79, 82, 85, 88, 91, 94, 97, 99,
+        102, 105, 107, 109, 112, 114, 114, 114, 114, 67, 64, 61, 58, 56, 57, 59,
+        61, 63, 65, 67, 70, 73, 76, 78, 82, 85, 88, 90, 93, 97, 99, 102, 105,
+        107, 110, 112, 114, 117, 117, 117, 117, 72, 68, 65, 62, 60, 61, 63, 64,
+        66, 68, 71, 73, 76, 78, 81, 84, 88, 90, 93, 96, 99, 102, 104, 107, 110,
+        112, 114, 117, 119, 119, 119, 119, 72, 68, 65, 62, 60, 61, 63, 64, 66,
+        68, 71, 73, 76, 78, 81, 84, 88, 90, 93, 96, 99, 102, 104, 107, 110, 112,
+        114, 117, 119, 119, 119, 119, 72, 68, 65, 62, 60, 61, 63, 64, 66, 68,
+        71, 73, 76, 78, 81, 84, 88, 90, 93, 96, 99, 102, 104, 107, 110, 112,
+        114, 117, 119, 119, 119, 119, 72, 68, 65, 62, 60, 61, 63, 64, 66, 68,
+        71, 73, 76, 78, 81, 84, 88, 90, 93, 96, 99, 102, 104, 107, 110, 112,
+        114, 117, 119, 119, 119, 119 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 104, 114, 146, 104, 126, 136, 158, 114, 136, 178, 207, 146, 158,
+        207, 250,
+        /* Size 8 */
+        64, 51, 96, 102, 109, 123, 141, 161, 51, 75, 98, 88, 91, 100, 114, 131,
+        96, 98, 116, 111, 111, 117, 128, 143, 102, 88, 111, 127, 134, 140, 150,
+        163, 109, 91, 111, 134, 151, 163, 174, 186, 123, 100, 117, 140, 163,
+        181, 196, 209, 141, 114, 128, 150, 174, 196, 214, 230, 161, 131, 143,
+        163, 186, 209, 230, 248,
+        /* Size 16 */
+        64, 57, 51, 67, 96, 99, 102, 106, 109, 116, 123, 132, 141, 151, 161,
+        161, 57, 59, 61, 75, 97, 96, 94, 97, 99, 105, 111, 118, 126, 135, 144,
+        144, 51, 61, 75, 85, 98, 92, 88, 89, 91, 95, 100, 107, 114, 122, 131,
+        131, 67, 75, 85, 94, 106, 102, 98, 99, 100, 104, 108, 114, 121, 128,
+        137, 137, 96, 97, 98, 106, 116, 113, 111, 111, 111, 114, 117, 123, 128,
+        135, 143, 143, 99, 96, 92, 102, 113, 116, 119, 120, 121, 125, 128, 133,
+        138, 145, 152, 152, 102, 94, 88, 98, 111, 119, 127, 131, 134, 137, 140,
+        145, 150, 156, 163, 163, 106, 97, 89, 99, 111, 120, 131, 136, 142, 146,
+        151, 156, 161, 167, 173, 173, 109, 99, 91, 100, 111, 121, 134, 142, 151,
+        157, 163, 168, 174, 179, 186, 186, 116, 105, 95, 104, 114, 125, 137,
+        146, 157, 164, 172, 178, 184, 190, 196, 196, 123, 111, 100, 108, 117,
+        128, 140, 151, 163, 172, 181, 188, 196, 202, 209, 209, 132, 118, 107,
+        114, 123, 133, 145, 156, 168, 178, 188, 196, 205, 211, 219, 219, 141,
+        126, 114, 121, 128, 138, 150, 161, 174, 184, 196, 205, 214, 222, 230,
+        230, 151, 135, 122, 128, 135, 145, 156, 167, 179, 190, 202, 211, 222,
+        230, 238, 238, 161, 144, 131, 137, 143, 152, 163, 173, 186, 196, 209,
+        219, 230, 238, 248, 248, 161, 144, 131, 137, 143, 152, 163, 173, 186,
+        196, 209, 219, 230, 238, 248, 248,
+        /* Size 32 */
+        64, 60, 57, 54, 51, 58, 67, 79, 96, 97, 99, 100, 102, 104, 106, 107,
+        109, 113, 116, 120, 123, 127, 132, 136, 141, 146, 151, 156, 161, 161,
+        161, 161, 60, 59, 58, 57, 56, 62, 70, 81, 96, 97, 97, 98, 98, 99, 101,
+        102, 104, 107, 110, 113, 117, 120, 124, 129, 133, 137, 142, 147, 152,
+        152, 152, 152, 57, 58, 59, 60, 61, 67, 75, 84, 97, 96, 96, 95, 94, 96,
+        97, 98, 99, 102, 105, 107, 111, 114, 118, 122, 126, 130, 135, 139, 144,
+        144, 144, 144, 54, 57, 60, 63, 67, 73, 79, 87, 97, 96, 94, 92, 91, 92,
+        93, 94, 95, 97, 100, 102, 105, 108, 112, 116, 120, 124, 128, 132, 137,
+        137, 137, 137, 51, 56, 61, 67, 75, 80, 85, 91, 98, 95, 92, 90, 88, 89,
+        89, 90, 91, 93, 95, 98, 100, 103, 107, 110, 114, 118, 122, 126, 131,
+        131, 131, 131, 58, 62, 67, 73, 80, 84, 89, 95, 102, 99, 97, 95, 93, 93,
+        94, 94, 95, 97, 99, 102, 104, 107, 110, 114, 117, 121, 125, 129, 134,
+        134, 134, 134, 67, 70, 75, 79, 85, 89, 94, 100, 106, 104, 102, 100, 98,
+        99, 99, 99, 100, 102, 104, 106, 108, 111, 114, 117, 121, 124, 128, 132,
+        137, 137, 137, 137, 79, 81, 84, 87, 91, 95, 100, 105, 110, 109, 107,
+        106, 104, 105, 105, 105, 105, 107, 109, 111, 112, 115, 118, 121, 125,
+        128, 132, 136, 140, 140, 140, 140, 96, 96, 97, 97, 98, 102, 106, 110,
+        116, 114, 113, 112, 111, 111, 111, 111, 111, 112, 114, 116, 117, 120,
+        123, 125, 128, 132, 135, 139, 143, 143, 143, 143, 97, 97, 96, 96, 95,
+        99, 104, 109, 114, 115, 115, 115, 115, 115, 115, 116, 116, 117, 119,
+        121, 122, 125, 128, 130, 133, 137, 140, 144, 148, 148, 148, 148, 99, 97,
+        96, 94, 92, 97, 102, 107, 113, 115, 116, 117, 119, 119, 120, 121, 121,
+        123, 125, 126, 128, 130, 133, 136, 138, 142, 145, 149, 152, 152, 152,
+        152, 100, 98, 95, 92, 90, 95, 100, 106, 112, 115, 117, 120, 123, 124,
+        125, 126, 127, 129, 131, 132, 134, 136, 139, 141, 144, 147, 150, 154,
+        157, 157, 157, 157, 102, 98, 94, 91, 88, 93, 98, 104, 111, 115, 119,
+        123, 127, 129, 131, 132, 134, 136, 137, 139, 140, 143, 145, 147, 150,
+        153, 156, 159, 163, 163, 163, 163, 104, 99, 96, 92, 89, 93, 99, 105,
+        111, 115, 119, 124, 129, 131, 133, 136, 138, 140, 142, 144, 146, 148,
+        150, 153, 155, 158, 161, 164, 168, 168, 168, 168, 106, 101, 97, 93, 89,
+        94, 99, 105, 111, 115, 120, 125, 131, 133, 136, 139, 142, 144, 146, 149,
+        151, 153, 156, 158, 161, 164, 167, 170, 173, 173, 173, 173, 107, 102,
+        98, 94, 90, 94, 99, 105, 111, 116, 121, 126, 132, 136, 139, 143, 147,
+        149, 151, 154, 157, 159, 162, 164, 167, 170, 173, 176, 179, 179, 179,
+        179, 109, 104, 99, 95, 91, 95, 100, 105, 111, 116, 121, 127, 134, 138,
+        142, 147, 151, 154, 157, 160, 163, 166, 168, 171, 174, 177, 179, 182,
+        186, 186, 186, 186, 113, 107, 102, 97, 93, 97, 102, 107, 112, 117, 123,
+        129, 136, 140, 144, 149, 154, 157, 160, 164, 167, 170, 173, 176, 179,
+        182, 185, 188, 191, 191, 191, 191, 116, 110, 105, 100, 95, 99, 104, 109,
+        114, 119, 125, 131, 137, 142, 146, 151, 157, 160, 164, 168, 172, 175,
+        178, 181, 184, 187, 190, 193, 196, 196, 196, 196, 120, 113, 107, 102,
+        98, 102, 106, 111, 116, 121, 126, 132, 139, 144, 149, 154, 160, 164,
+        168, 172, 176, 180, 183, 186, 190, 193, 196, 199, 202, 202, 202, 202,
+        123, 117, 111, 105, 100, 104, 108, 112, 117, 122, 128, 134, 140, 146,
+        151, 157, 163, 167, 172, 176, 181, 185, 188, 192, 196, 199, 202, 205,
+        209, 209, 209, 209, 127, 120, 114, 108, 103, 107, 111, 115, 120, 125,
+        130, 136, 143, 148, 153, 159, 166, 170, 175, 180, 185, 188, 192, 196,
+        200, 203, 207, 210, 214, 214, 214, 214, 132, 124, 118, 112, 107, 110,
+        114, 118, 123, 128, 133, 139, 145, 150, 156, 162, 168, 173, 178, 183,
+        188, 192, 196, 200, 205, 208, 211, 215, 219, 219, 219, 219, 136, 129,
+        122, 116, 110, 114, 117, 121, 125, 130, 136, 141, 147, 153, 158, 164,
+        171, 176, 181, 186, 192, 196, 200, 205, 209, 213, 216, 220, 224, 224,
+        224, 224, 141, 133, 126, 120, 114, 117, 121, 125, 128, 133, 138, 144,
+        150, 155, 161, 167, 174, 179, 184, 190, 196, 200, 205, 209, 214, 218,
+        222, 226, 230, 230, 230, 230, 146, 137, 130, 124, 118, 121, 124, 128,
+        132, 137, 142, 147, 153, 158, 164, 170, 177, 182, 187, 193, 199, 203,
+        208, 213, 218, 222, 226, 230, 234, 234, 234, 234, 151, 142, 135, 128,
+        122, 125, 128, 132, 135, 140, 145, 150, 156, 161, 167, 173, 179, 185,
+        190, 196, 202, 207, 211, 216, 222, 226, 230, 234, 238, 238, 238, 238,
+        156, 147, 139, 132, 126, 129, 132, 136, 139, 144, 149, 154, 159, 164,
+        170, 176, 182, 188, 193, 199, 205, 210, 215, 220, 226, 230, 234, 238,
+        243, 243, 243, 243, 161, 152, 144, 137, 131, 134, 137, 140, 143, 148,
+        152, 157, 163, 168, 173, 179, 186, 191, 196, 202, 209, 214, 219, 224,
+        230, 234, 238, 243, 248, 248, 248, 248, 161, 152, 144, 137, 131, 134,
+        137, 140, 143, 148, 152, 157, 163, 168, 173, 179, 186, 191, 196, 202,
+        209, 214, 219, 224, 230, 234, 238, 243, 248, 248, 248, 248, 161, 152,
+        144, 137, 131, 134, 137, 140, 143, 148, 152, 157, 163, 168, 173, 179,
+        186, 191, 196, 202, 209, 214, 219, 224, 230, 234, 238, 243, 248, 248,
+        248, 248, 161, 152, 144, 137, 131, 134, 137, 140, 143, 148, 152, 157,
+        163, 168, 173, 179, 186, 191, 196, 202, 209, 214, 219, 224, 230, 234,
+        238, 243, 248, 248, 248, 248 },
+      { /* Intra matrices */
+        /* Size 4 */
+        25, 42, 46, 60, 42, 51, 55, 65, 46, 55, 75, 88, 60, 65, 88, 109,
+        /* Size 8 */
+        27, 22, 41, 44, 48, 54, 63, 72, 22, 32, 42, 38, 39, 43, 50, 58, 41, 42,
+        50, 49, 48, 51, 57, 64, 44, 38, 49, 56, 59, 62, 67, 73, 48, 39, 48, 59,
+        67, 73, 78, 84, 54, 43, 51, 62, 73, 82, 90, 96, 63, 50, 57, 67, 78, 90,
+        99, 107, 72, 58, 64, 73, 84, 96, 107, 117,
+        /* Size 16 */
+        26, 23, 21, 27, 40, 41, 43, 44, 46, 49, 52, 56, 61, 65, 70, 70, 23, 24,
+        25, 31, 40, 40, 39, 40, 42, 44, 47, 50, 54, 58, 62, 62, 21, 25, 31, 35,
+        41, 39, 37, 37, 38, 40, 42, 45, 48, 52, 56, 56, 27, 31, 35, 39, 45, 43,
+        41, 41, 42, 44, 46, 48, 51, 55, 58, 58, 40, 40, 41, 45, 49, 48, 47, 47,
+        47, 48, 50, 52, 55, 58, 62, 62, 41, 40, 39, 43, 48, 49, 50, 51, 52, 53,
+        54, 57, 59, 62, 66, 66, 43, 39, 37, 41, 47, 50, 54, 56, 57, 59, 60, 62,
+        65, 68, 71, 71, 44, 40, 37, 41, 47, 51, 56, 58, 61, 63, 65, 67, 70, 73,
+        76, 76, 46, 42, 38, 42, 47, 52, 57, 61, 65, 68, 71, 73, 76, 79, 82, 82,
+        49, 44, 40, 44, 48, 53, 59, 63, 68, 71, 75, 78, 81, 84, 87, 87, 52, 47,
+        42, 46, 50, 54, 60, 65, 71, 75, 80, 83, 87, 90, 93, 93, 56, 50, 45, 48,
+        52, 57, 62, 67, 73, 78, 83, 87, 91, 95, 98, 98, 61, 54, 48, 51, 55, 59,
+        65, 70, 76, 81, 87, 91, 96, 100, 104, 104, 65, 58, 52, 55, 58, 62, 68,
+        73, 79, 84, 90, 95, 100, 104, 108, 108, 70, 62, 56, 58, 62, 66, 71, 76,
+        82, 87, 93, 98, 104, 108, 113, 113, 70, 62, 56, 58, 62, 66, 71, 76, 82,
+        87, 93, 98, 104, 108, 113, 113,
+        /* Size 32 */
+        26, 24, 23, 22, 21, 23, 27, 32, 39, 40, 41, 41, 42, 43, 44, 44, 45, 47,
+        48, 50, 52, 53, 55, 57, 60, 62, 64, 66, 69, 69, 69, 69, 24, 24, 23, 23,
+        22, 25, 29, 33, 40, 40, 40, 40, 40, 41, 42, 42, 43, 44, 46, 47, 49, 50,
+        52, 54, 56, 58, 60, 62, 65, 65, 65, 65, 23, 23, 24, 24, 25, 27, 30, 34,
+        40, 40, 39, 39, 39, 39, 40, 40, 41, 42, 43, 45, 46, 47, 49, 51, 53, 55,
+        57, 59, 61, 61, 61, 61, 22, 23, 24, 26, 27, 30, 32, 36, 40, 39, 39, 38,
+        37, 38, 38, 39, 39, 40, 41, 42, 43, 45, 46, 48, 50, 52, 54, 56, 58, 58,
+        58, 58, 21, 22, 25, 27, 31, 33, 35, 37, 40, 39, 38, 37, 36, 36, 37, 37,
+        37, 38, 39, 40, 41, 43, 44, 46, 47, 49, 51, 53, 55, 55, 55, 55, 23, 25,
+        27, 30, 33, 34, 37, 39, 42, 41, 40, 39, 38, 38, 39, 39, 39, 40, 41, 42,
+        43, 44, 46, 47, 49, 50, 52, 54, 56, 56, 56, 56, 27, 29, 30, 32, 35, 37,
+        39, 41, 44, 43, 42, 41, 40, 41, 41, 41, 41, 42, 43, 44, 45, 46, 47, 49,
+        50, 52, 54, 56, 58, 58, 58, 58, 32, 33, 34, 36, 37, 39, 41, 43, 46, 45,
+        44, 44, 43, 43, 43, 43, 43, 44, 45, 46, 47, 48, 49, 51, 52, 54, 55, 57,
+        59, 59, 59, 59, 39, 40, 40, 40, 40, 42, 44, 46, 48, 48, 47, 47, 46, 46,
+        46, 46, 46, 47, 47, 48, 49, 50, 51, 52, 54, 55, 57, 59, 61, 61, 61, 61,
+        40, 40, 40, 39, 39, 41, 43, 45, 48, 48, 48, 48, 48, 48, 48, 48, 48, 49,
+        50, 50, 51, 52, 53, 55, 56, 57, 59, 61, 63, 63, 63, 63, 41, 40, 39, 39,
+        38, 40, 42, 44, 47, 48, 48, 49, 50, 50, 50, 50, 51, 51, 52, 53, 54, 55,
+        56, 57, 58, 60, 61, 63, 65, 65, 65, 65, 41, 40, 39, 38, 37, 39, 41, 44,
+        47, 48, 49, 50, 51, 52, 52, 53, 53, 54, 55, 56, 56, 57, 58, 60, 61, 62,
+        64, 65, 67, 67, 67, 67, 42, 40, 39, 37, 36, 38, 40, 43, 46, 48, 50, 51,
+        53, 54, 55, 56, 56, 57, 58, 59, 59, 60, 61, 62, 64, 65, 66, 68, 69, 69,
+        69, 69, 43, 41, 39, 38, 36, 38, 41, 43, 46, 48, 50, 52, 54, 55, 56, 57,
+        58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 72, 72, 72, 72, 44, 42,
+        40, 38, 37, 39, 41, 43, 46, 48, 50, 52, 55, 56, 57, 59, 60, 61, 62, 63,
+        64, 65, 66, 67, 69, 70, 71, 73, 74, 74, 74, 74, 44, 42, 40, 39, 37, 39,
+        41, 43, 46, 48, 50, 53, 56, 57, 59, 60, 62, 63, 64, 66, 67, 68, 69, 70,
+        72, 73, 74, 76, 77, 77, 77, 77, 45, 43, 41, 39, 37, 39, 41, 43, 46, 48,
+        51, 53, 56, 58, 60, 62, 64, 65, 67, 68, 70, 71, 72, 73, 75, 76, 77, 79,
+        80, 80, 80, 80, 47, 44, 42, 40, 38, 40, 42, 44, 47, 49, 51, 54, 57, 59,
+        61, 63, 65, 67, 68, 70, 72, 73, 74, 76, 77, 78, 80, 81, 83, 83, 83, 83,
+        48, 46, 43, 41, 39, 41, 43, 45, 47, 50, 52, 55, 58, 60, 62, 64, 67, 68,
+        70, 72, 74, 75, 77, 78, 80, 81, 83, 84, 86, 86, 86, 86, 50, 47, 45, 42,
+        40, 42, 44, 46, 48, 50, 53, 56, 59, 61, 63, 66, 68, 70, 72, 74, 76, 77,
+        79, 81, 82, 84, 85, 87, 89, 89, 89, 89, 52, 49, 46, 43, 41, 43, 45, 47,
+        49, 51, 54, 56, 59, 62, 64, 67, 70, 72, 74, 76, 78, 80, 82, 83, 85, 87,
+        88, 90, 92, 92, 92, 92, 53, 50, 47, 45, 43, 44, 46, 48, 50, 52, 55, 57,
+        60, 63, 65, 68, 71, 73, 75, 77, 80, 82, 83, 85, 87, 89, 91, 92, 94, 94,
+        94, 94, 55, 52, 49, 46, 44, 46, 47, 49, 51, 53, 56, 58, 61, 64, 66, 69,
+        72, 74, 77, 79, 82, 83, 85, 87, 90, 91, 93, 95, 97, 97, 97, 97, 57, 54,
+        51, 48, 46, 47, 49, 51, 52, 55, 57, 60, 62, 65, 67, 70, 73, 76, 78, 81,
+        83, 85, 87, 90, 92, 94, 95, 97, 99, 99, 99, 99, 60, 56, 53, 50, 47, 49,
+        50, 52, 54, 56, 58, 61, 64, 66, 69, 72, 75, 77, 80, 82, 85, 87, 90, 92,
+        94, 96, 98, 100, 102, 102, 102, 102, 62, 58, 55, 52, 49, 50, 52, 54, 55,
+        57, 60, 62, 65, 67, 70, 73, 76, 78, 81, 84, 87, 89, 91, 94, 96, 98, 100,
+        102, 104, 104, 104, 104, 64, 60, 57, 54, 51, 52, 54, 55, 57, 59, 61, 64,
+        66, 69, 71, 74, 77, 80, 83, 85, 88, 91, 93, 95, 98, 100, 102, 104, 107,
+        107, 107, 107, 66, 62, 59, 56, 53, 54, 56, 57, 59, 61, 63, 65, 68, 70,
+        73, 76, 79, 81, 84, 87, 90, 92, 95, 97, 100, 102, 104, 107, 109, 109,
+        109, 109, 69, 65, 61, 58, 55, 56, 58, 59, 61, 63, 65, 67, 69, 72, 74,
+        77, 80, 83, 86, 89, 92, 94, 97, 99, 102, 104, 107, 109, 111, 111, 111,
+        111, 69, 65, 61, 58, 55, 56, 58, 59, 61, 63, 65, 67, 69, 72, 74, 77, 80,
+        83, 86, 89, 92, 94, 97, 99, 102, 104, 107, 109, 111, 111, 111, 111, 69,
+        65, 61, 58, 55, 56, 58, 59, 61, 63, 65, 67, 69, 72, 74, 77, 80, 83, 86,
+        89, 92, 94, 97, 99, 102, 104, 107, 109, 111, 111, 111, 111, 69, 65, 61,
+        58, 55, 56, 58, 59, 61, 63, 65, 67, 69, 72, 74, 77, 80, 83, 86, 89, 92,
+        94, 97, 99, 102, 104, 107, 109, 111, 111, 111, 111 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 70, 116, 182, 70, 106, 148, 199, 116, 148, 207, 251, 182, 199, 251,
+        292,
+        /* Size 8 */
+        64, 48, 52, 69, 92, 120, 150, 179, 48, 55, 53, 62, 79, 102, 128, 155,
+        52, 53, 74, 87, 101, 121, 144, 168, 69, 62, 87, 110, 129, 147, 166, 186,
+        92, 79, 101, 129, 153, 172, 190, 207, 120, 102, 121, 147, 172, 194, 211,
+        226, 150, 128, 144, 166, 190, 211, 228, 242, 179, 155, 168, 186, 207,
+        226, 242, 255,
+        /* Size 16 */
+        64, 55, 48, 50, 52, 59, 69, 79, 92, 104, 120, 133, 150, 163, 179, 179,
+        55, 53, 51, 52, 52, 58, 65, 74, 85, 96, 110, 123, 138, 151, 166, 166,
+        48, 51, 55, 54, 53, 57, 62, 70, 79, 89, 102, 113, 128, 140, 155, 155,
+        50, 52, 54, 58, 62, 67, 72, 80, 89, 99, 111, 122, 135, 147, 161, 161,
+        52, 52, 53, 62, 74, 80, 87, 93, 101, 110, 121, 131, 144, 155, 168, 168,
+        59, 58, 57, 67, 80, 88, 97, 105, 113, 122, 133, 143, 154, 165, 177, 177,
+        69, 65, 62, 72, 87, 97, 110, 119, 129, 137, 147, 156, 166, 176, 186,
+        186, 79, 74, 70, 80, 93, 105, 119, 128, 140, 149, 158, 167, 177, 186,
+        196, 196, 92, 85, 79, 89, 101, 113, 129, 140, 153, 162, 172, 181, 190,
+        198, 207, 207, 104, 96, 89, 99, 110, 122, 137, 149, 162, 171, 182, 191,
+        200, 208, 216, 216, 120, 110, 102, 111, 121, 133, 147, 158, 172, 182,
+        194, 202, 211, 218, 226, 226, 133, 123, 113, 122, 131, 143, 156, 167,
+        181, 191, 202, 210, 219, 227, 234, 234, 150, 138, 128, 135, 144, 154,
+        166, 177, 190, 200, 211, 219, 228, 235, 242, 242, 163, 151, 140, 147,
+        155, 165, 176, 186, 198, 208, 218, 227, 235, 242, 249, 249, 179, 166,
+        155, 161, 168, 177, 186, 196, 207, 216, 226, 234, 242, 249, 255, 255,
+        179, 166, 155, 161, 168, 177, 186, 196, 207, 216, 226, 234, 242, 249,
+        255, 255,
+        /* Size 32 */
+        64, 59, 55, 51, 48, 49, 50, 51, 52, 55, 59, 64, 69, 73, 79, 85, 92, 98,
+        104, 112, 120, 126, 133, 141, 150, 156, 163, 170, 179, 179, 179, 179,
+        59, 56, 54, 52, 50, 50, 51, 52, 52, 55, 59, 62, 67, 71, 76, 82, 89, 94,
+        100, 107, 115, 121, 128, 135, 144, 150, 156, 164, 172, 172, 172, 172,
+        55, 54, 53, 52, 51, 52, 52, 52, 52, 55, 58, 61, 65, 69, 74, 79, 85, 90,
+        96, 103, 110, 116, 123, 130, 138, 144, 151, 158, 166, 166, 166, 166, 51,
+        52, 52, 53, 53, 53, 53, 53, 53, 55, 58, 60, 63, 67, 72, 77, 82, 87, 93,
+        99, 106, 112, 118, 125, 133, 139, 145, 152, 160, 160, 160, 160, 48, 50,
+        51, 53, 55, 55, 54, 53, 53, 55, 57, 59, 62, 66, 70, 74, 79, 84, 89, 95,
+        102, 107, 113, 120, 128, 134, 140, 147, 155, 155, 155, 155, 49, 50, 52,
+        53, 55, 55, 56, 56, 57, 59, 61, 64, 67, 70, 74, 79, 84, 89, 94, 100,
+        106, 112, 117, 124, 131, 137, 144, 150, 158, 158, 158, 158, 50, 51, 52,
+        53, 54, 56, 58, 60, 62, 64, 67, 69, 72, 76, 80, 84, 89, 94, 99, 104,
+        111, 116, 122, 128, 135, 141, 147, 154, 161, 161, 161, 161, 51, 52, 52,
+        53, 53, 56, 60, 63, 67, 70, 73, 76, 79, 82, 86, 90, 95, 99, 104, 110,
+        116, 121, 126, 133, 139, 145, 151, 157, 164, 164, 164, 164, 52, 52, 52,
+        53, 53, 57, 62, 67, 74, 77, 80, 83, 87, 90, 93, 97, 101, 106, 110, 115,
+        121, 126, 131, 137, 144, 149, 155, 161, 168, 168, 168, 168, 55, 55, 55,
+        55, 55, 59, 64, 70, 77, 80, 84, 87, 92, 95, 99, 103, 107, 111, 116, 121,
+        126, 131, 137, 142, 149, 154, 160, 166, 172, 172, 172, 172, 59, 59, 58,
+        58, 57, 61, 67, 73, 80, 84, 88, 92, 97, 101, 105, 109, 113, 118, 122,
+        127, 133, 137, 143, 148, 154, 159, 165, 170, 177, 177, 177, 177, 64, 62,
+        61, 60, 59, 64, 69, 76, 83, 87, 92, 97, 103, 107, 111, 116, 121, 125,
+        129, 134, 139, 144, 149, 154, 160, 165, 170, 176, 181, 181, 181, 181,
+        69, 67, 65, 63, 62, 67, 72, 79, 87, 92, 97, 103, 110, 114, 119, 124,
+        129, 133, 137, 142, 147, 151, 156, 161, 166, 171, 176, 181, 186, 186,
+        186, 186, 73, 71, 69, 67, 66, 70, 76, 82, 90, 95, 101, 107, 114, 119,
+        123, 129, 134, 138, 143, 147, 152, 157, 161, 166, 172, 176, 181, 186,
+        191, 191, 191, 191, 79, 76, 74, 72, 70, 74, 80, 86, 93, 99, 105, 111,
+        119, 123, 128, 134, 140, 144, 149, 153, 158, 163, 167, 172, 177, 182,
+        186, 191, 196, 196, 196, 196, 85, 82, 79, 77, 74, 79, 84, 90, 97, 103,
+        109, 116, 124, 129, 134, 140, 146, 150, 155, 160, 165, 169, 174, 178,
+        183, 188, 192, 197, 202, 202, 202, 202, 92, 89, 85, 82, 79, 84, 89, 95,
+        101, 107, 113, 121, 129, 134, 140, 146, 153, 157, 162, 167, 172, 176,
+        181, 185, 190, 194, 198, 203, 207, 207, 207, 207, 98, 94, 90, 87, 84,
+        89, 94, 99, 106, 111, 118, 125, 133, 138, 144, 150, 157, 162, 167, 172,
+        177, 181, 186, 190, 195, 199, 203, 207, 212, 212, 212, 212, 104, 100,
+        96, 93, 89, 94, 99, 104, 110, 116, 122, 129, 137, 143, 149, 155, 162,
+        167, 171, 177, 182, 186, 191, 195, 200, 204, 208, 212, 216, 216, 216,
+        216, 112, 107, 103, 99, 95, 100, 104, 110, 115, 121, 127, 134, 142, 147,
+        153, 160, 167, 172, 177, 182, 188, 192, 196, 201, 205, 209, 213, 217,
+        221, 221, 221, 221, 120, 115, 110, 106, 102, 106, 111, 116, 121, 126,
+        133, 139, 147, 152, 158, 165, 172, 177, 182, 188, 194, 198, 202, 206,
+        211, 215, 218, 222, 226, 226, 226, 226, 126, 121, 116, 112, 107, 112,
+        116, 121, 126, 131, 137, 144, 151, 157, 163, 169, 176, 181, 186, 192,
+        198, 202, 206, 211, 215, 219, 222, 226, 230, 230, 230, 230, 133, 128,
+        123, 118, 113, 117, 122, 126, 131, 137, 143, 149, 156, 161, 167, 174,
+        181, 186, 191, 196, 202, 206, 210, 215, 219, 223, 227, 230, 234, 234,
+        234, 234, 141, 135, 130, 125, 120, 124, 128, 133, 137, 142, 148, 154,
+        161, 166, 172, 178, 185, 190, 195, 201, 206, 211, 215, 219, 224, 227,
+        231, 234, 238, 238, 238, 238, 150, 144, 138, 133, 128, 131, 135, 139,
+        144, 149, 154, 160, 166, 172, 177, 183, 190, 195, 200, 205, 211, 215,
+        219, 224, 228, 232, 235, 239, 242, 242, 242, 242, 156, 150, 144, 139,
+        134, 137, 141, 145, 149, 154, 159, 165, 171, 176, 182, 188, 194, 199,
+        204, 209, 215, 219, 223, 227, 232, 235, 238, 242, 246, 246, 246, 246,
+        163, 156, 151, 145, 140, 144, 147, 151, 155, 160, 165, 170, 176, 181,
+        186, 192, 198, 203, 208, 213, 218, 222, 227, 231, 235, 238, 242, 245,
+        249, 249, 249, 249, 170, 164, 158, 152, 147, 150, 154, 157, 161, 166,
+        170, 176, 181, 186, 191, 197, 203, 207, 212, 217, 222, 226, 230, 234,
+        239, 242, 245, 249, 252, 252, 252, 252, 179, 172, 166, 160, 155, 158,
+        161, 164, 168, 172, 177, 181, 186, 191, 196, 202, 207, 212, 216, 221,
+        226, 230, 234, 238, 242, 246, 249, 252, 255, 255, 255, 255, 179, 172,
+        166, 160, 155, 158, 161, 164, 168, 172, 177, 181, 186, 191, 196, 202,
+        207, 212, 216, 221, 226, 230, 234, 238, 242, 246, 249, 252, 255, 255,
+        255, 255, 179, 172, 166, 160, 155, 158, 161, 164, 168, 172, 177, 181,
+        186, 191, 196, 202, 207, 212, 216, 221, 226, 230, 234, 238, 242, 246,
+        249, 252, 255, 255, 255, 255, 179, 172, 166, 160, 155, 158, 161, 164,
+        168, 172, 177, 181, 186, 191, 196, 202, 207, 212, 216, 221, 226, 230,
+        234, 238, 242, 246, 249, 252, 255, 255, 255, 255 },
+      { /* Intra matrices */
+        /* Size 4 */
+        21, 23, 40, 65, 23, 36, 52, 72, 40, 52, 75, 93, 65, 72, 93, 111,
+        /* Size 8 */
+        26, 19, 21, 28, 38, 50, 63, 77, 19, 22, 21, 25, 32, 42, 53, 66, 21, 21,
+        30, 35, 42, 50, 61, 72, 28, 25, 35, 46, 54, 62, 71, 81, 38, 32, 42, 54,
+        65, 74, 83, 92, 50, 42, 50, 62, 74, 85, 94, 101, 63, 53, 61, 71, 83, 94,
+        103, 110, 77, 66, 72, 81, 92, 101, 110, 117,
+        /* Size 16 */
+        25, 21, 18, 19, 20, 23, 26, 31, 36, 41, 48, 54, 61, 67, 74, 74, 21, 20,
+        20, 20, 20, 22, 25, 29, 33, 38, 44, 49, 56, 61, 68, 68, 18, 20, 21, 21,
+        20, 22, 24, 27, 31, 35, 40, 45, 51, 57, 63, 63, 19, 20, 21, 22, 24, 26,
+        28, 31, 35, 39, 44, 49, 55, 60, 66, 66, 20, 20, 20, 24, 29, 31, 34, 37,
+        40, 44, 48, 53, 58, 63, 69, 69, 23, 22, 22, 26, 31, 34, 38, 41, 45, 49,
+        53, 58, 63, 68, 73, 73, 26, 25, 24, 28, 34, 38, 44, 47, 52, 55, 60, 64,
+        68, 73, 78, 78, 31, 29, 27, 31, 37, 41, 47, 52, 57, 60, 65, 69, 74, 78,
+        83, 83, 36, 33, 31, 35, 40, 45, 52, 57, 62, 67, 71, 75, 80, 84, 88, 88,
+        41, 38, 35, 39, 44, 49, 55, 60, 67, 71, 76, 80, 84, 88, 92, 92, 48, 44,
+        40, 44, 48, 53, 60, 65, 71, 76, 81, 85, 90, 94, 97, 97, 54, 49, 45, 49,
+        53, 58, 64, 69, 75, 80, 85, 90, 94, 98, 101, 101, 61, 56, 51, 55, 58,
+        63, 68, 74, 80, 84, 90, 94, 99, 102, 106, 106, 67, 61, 57, 60, 63, 68,
+        73, 78, 84, 88, 94, 98, 102, 105, 109, 109, 74, 68, 63, 66, 69, 73, 78,
+        83, 88, 92, 97, 101, 106, 109, 112, 112, 74, 68, 63, 66, 69, 73, 78, 83,
+        88, 92, 97, 101, 106, 109, 112, 112,
+        /* Size 32 */
+        24, 22, 21, 19, 18, 18, 19, 19, 19, 21, 22, 24, 26, 28, 30, 32, 35, 38,
+        40, 43, 47, 50, 53, 56, 60, 63, 66, 69, 73, 73, 73, 73, 22, 21, 20, 19,
+        19, 19, 19, 19, 19, 21, 22, 23, 25, 27, 29, 31, 34, 36, 39, 41, 45, 47,
+        50, 53, 57, 60, 63, 66, 70, 70, 70, 70, 21, 20, 20, 19, 19, 19, 19, 19,
+        20, 21, 22, 23, 25, 26, 28, 30, 33, 35, 37, 40, 43, 45, 48, 51, 55, 57,
+        60, 63, 67, 67, 67, 67, 19, 19, 19, 20, 20, 20, 20, 20, 20, 21, 22, 23,
+        24, 25, 27, 29, 31, 33, 36, 38, 41, 43, 46, 49, 52, 55, 58, 61, 64, 64,
+        64, 64, 18, 19, 19, 20, 21, 20, 20, 20, 20, 21, 21, 22, 23, 25, 26, 28,
+        30, 32, 34, 37, 39, 42, 44, 47, 50, 53, 56, 59, 62, 62, 62, 62, 18, 19,
+        19, 20, 20, 21, 21, 21, 21, 22, 23, 24, 25, 27, 28, 30, 32, 34, 36, 38,
+        41, 43, 46, 49, 52, 54, 57, 60, 63, 63, 63, 63, 19, 19, 19, 20, 20, 21,
+        22, 22, 23, 24, 25, 26, 27, 29, 30, 32, 34, 36, 38, 40, 43, 45, 48, 50,
+        53, 56, 59, 62, 65, 65, 65, 65, 19, 19, 19, 20, 20, 21, 22, 24, 25, 26,
+        27, 29, 30, 31, 33, 35, 36, 38, 40, 43, 45, 47, 50, 52, 55, 58, 60, 63,
+        66, 66, 66, 66, 19, 19, 20, 20, 20, 21, 23, 25, 28, 29, 30, 32, 33, 34,
+        36, 37, 39, 41, 43, 45, 47, 49, 52, 54, 57, 59, 62, 65, 68, 68, 68, 68,
+        21, 21, 21, 21, 21, 22, 24, 26, 29, 30, 32, 33, 35, 37, 38, 40, 42, 43,
+        45, 47, 50, 52, 54, 57, 59, 62, 64, 67, 70, 70, 70, 70, 22, 22, 22, 22,
+        21, 23, 25, 27, 30, 32, 34, 35, 37, 39, 40, 42, 44, 46, 48, 50, 52, 54,
+        57, 59, 62, 64, 66, 69, 72, 72, 72, 72, 24, 23, 23, 23, 22, 24, 26, 29,
+        32, 33, 35, 38, 40, 42, 43, 45, 47, 49, 51, 53, 55, 57, 59, 62, 64, 66,
+        69, 71, 74, 74, 74, 74, 26, 25, 25, 24, 23, 25, 27, 30, 33, 35, 37, 40,
+        43, 45, 46, 48, 51, 52, 54, 56, 58, 60, 62, 65, 67, 69, 71, 74, 76, 76,
+        76, 76, 28, 27, 26, 25, 25, 27, 29, 31, 34, 37, 39, 42, 45, 46, 48, 51,
+        53, 55, 57, 59, 61, 63, 65, 67, 70, 72, 74, 76, 79, 79, 79, 79, 30, 29,
+        28, 27, 26, 28, 30, 33, 36, 38, 40, 43, 46, 48, 51, 53, 55, 57, 59, 61,
+        64, 66, 68, 70, 72, 74, 76, 79, 81, 81, 81, 81, 32, 31, 30, 29, 28, 30,
+        32, 35, 37, 40, 42, 45, 48, 51, 53, 55, 58, 60, 62, 64, 67, 69, 71, 73,
+        75, 77, 79, 81, 83, 83, 83, 83, 35, 34, 33, 31, 30, 32, 34, 36, 39, 42,
+        44, 47, 51, 53, 55, 58, 61, 63, 65, 67, 70, 72, 74, 76, 78, 80, 82, 84,
+        86, 86, 86, 86, 38, 36, 35, 33, 32, 34, 36, 38, 41, 43, 46, 49, 52, 55,
+        57, 60, 63, 65, 67, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 88, 88, 88,
+        40, 39, 37, 36, 34, 36, 38, 40, 43, 45, 48, 51, 54, 57, 59, 62, 65, 67,
+        70, 72, 74, 76, 78, 81, 83, 85, 86, 88, 91, 91, 91, 91, 43, 41, 40, 38,
+        37, 38, 40, 43, 45, 47, 50, 53, 56, 59, 61, 64, 67, 70, 72, 74, 77, 79,
+        81, 83, 85, 87, 89, 91, 93, 93, 93, 93, 47, 45, 43, 41, 39, 41, 43, 45,
+        47, 50, 52, 55, 58, 61, 64, 67, 70, 72, 74, 77, 80, 82, 84, 86, 88, 90,
+        92, 94, 95, 95, 95, 95, 50, 47, 45, 43, 42, 43, 45, 47, 49, 52, 54, 57,
+        60, 63, 66, 69, 72, 74, 76, 79, 82, 84, 86, 88, 90, 92, 94, 95, 97, 97,
+        97, 97, 53, 50, 48, 46, 44, 46, 48, 50, 52, 54, 57, 59, 62, 65, 68, 71,
+        74, 76, 78, 81, 84, 86, 88, 90, 92, 94, 96, 97, 99, 99, 99, 99, 56, 53,
+        51, 49, 47, 49, 50, 52, 54, 57, 59, 62, 65, 67, 70, 73, 76, 78, 81, 83,
+        86, 88, 90, 92, 94, 96, 98, 100, 101, 101, 101, 101, 60, 57, 55, 52, 50,
+        52, 53, 55, 57, 59, 62, 64, 67, 70, 72, 75, 78, 80, 83, 85, 88, 90, 92,
+        94, 97, 98, 100, 102, 104, 104, 104, 104, 63, 60, 57, 55, 53, 54, 56,
+        58, 59, 62, 64, 66, 69, 72, 74, 77, 80, 82, 85, 87, 90, 92, 94, 96, 98,
+        100, 102, 103, 105, 105, 105, 105, 66, 63, 60, 58, 56, 57, 59, 60, 62,
+        64, 66, 69, 71, 74, 76, 79, 82, 84, 86, 89, 92, 94, 96, 98, 100, 102,
+        103, 105, 107, 107, 107, 107, 69, 66, 63, 61, 59, 60, 62, 63, 65, 67,
+        69, 71, 74, 76, 79, 81, 84, 86, 88, 91, 94, 95, 97, 100, 102, 103, 105,
+        107, 108, 108, 108, 108, 73, 70, 67, 64, 62, 63, 65, 66, 68, 70, 72, 74,
+        76, 79, 81, 83, 86, 88, 91, 93, 95, 97, 99, 101, 104, 105, 107, 108,
+        110, 110, 110, 110, 73, 70, 67, 64, 62, 63, 65, 66, 68, 70, 72, 74, 76,
+        79, 81, 83, 86, 88, 91, 93, 95, 97, 99, 101, 104, 105, 107, 108, 110,
+        110, 110, 110, 73, 70, 67, 64, 62, 63, 65, 66, 68, 70, 72, 74, 76, 79,
+        81, 83, 86, 88, 91, 93, 95, 97, 99, 101, 104, 105, 107, 108, 110, 110,
+        110, 110, 73, 70, 67, 64, 62, 63, 65, 66, 68, 70, 72, 74, 76, 79, 81,
+        83, 86, 88, 91, 93, 95, 97, 99, 101, 104, 105, 107, 108, 110, 110, 110,
+        110 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 101, 110, 139, 101, 122, 130, 149, 110, 130, 167, 190, 139, 149,
+        190, 225,
+        /* Size 8 */
+        64, 52, 94, 99, 106, 119, 134, 152, 52, 75, 96, 87, 89, 98, 110, 125,
+        94, 96, 112, 108, 108, 113, 123, 136, 99, 87, 108, 122, 128, 134, 142,
+        153, 106, 89, 108, 128, 143, 153, 162, 172, 119, 98, 113, 134, 153, 168,
+        180, 191, 134, 110, 123, 142, 162, 180, 195, 207, 152, 125, 136, 153,
+        172, 191, 207, 221,
+        /* Size 16 */
+        64, 57, 52, 67, 94, 97, 99, 103, 106, 112, 119, 126, 134, 142, 152, 152,
+        57, 59, 61, 74, 95, 94, 93, 95, 97, 102, 107, 114, 121, 129, 137, 137,
+        52, 61, 75, 84, 96, 91, 87, 88, 89, 93, 98, 104, 110, 117, 125, 125, 67,
+        74, 84, 92, 103, 99, 96, 97, 97, 101, 105, 110, 116, 123, 130, 130, 94,
+        95, 96, 103, 112, 110, 108, 108, 108, 110, 113, 118, 123, 129, 136, 136,
+        97, 94, 91, 99, 110, 112, 115, 116, 117, 120, 123, 127, 132, 138, 144,
+        144, 99, 93, 87, 96, 108, 115, 122, 125, 128, 131, 134, 138, 142, 147,
+        153, 153, 103, 95, 88, 97, 108, 116, 125, 130, 135, 139, 143, 147, 151,
+        156, 162, 162, 106, 97, 89, 97, 108, 117, 128, 135, 143, 148, 153, 157,
+        162, 167, 172, 172, 112, 102, 93, 101, 110, 120, 131, 139, 148, 154,
+        160, 165, 171, 176, 181, 181, 119, 107, 98, 105, 113, 123, 134, 143,
+        153, 160, 168, 174, 180, 185, 191, 191, 126, 114, 104, 110, 118, 127,
+        138, 147, 157, 165, 174, 180, 187, 193, 199, 199, 134, 121, 110, 116,
+        123, 132, 142, 151, 162, 171, 180, 187, 195, 201, 207, 207, 142, 129,
+        117, 123, 129, 138, 147, 156, 167, 176, 185, 193, 201, 207, 214, 214,
+        152, 137, 125, 130, 136, 144, 153, 162, 172, 181, 191, 199, 207, 214,
+        221, 221, 152, 137, 125, 130, 136, 144, 153, 162, 172, 181, 191, 199,
+        207, 214, 221, 221,
+        /* Size 32 */
+        64, 60, 57, 54, 52, 58, 67, 78, 94, 95, 97, 98, 99, 101, 103, 104, 106,
+        109, 112, 115, 119, 122, 126, 130, 134, 138, 142, 147, 152, 152, 152,
+        152, 60, 59, 58, 57, 56, 62, 70, 80, 94, 95, 95, 95, 96, 97, 99, 100,
+        101, 104, 107, 110, 113, 116, 120, 123, 127, 131, 135, 139, 144, 144,
+        144, 144, 57, 58, 59, 60, 61, 67, 74, 83, 95, 94, 94, 93, 93, 94, 95,
+        96, 97, 99, 102, 104, 107, 110, 114, 117, 121, 125, 129, 133, 137, 137,
+        137, 137, 54, 57, 60, 63, 67, 72, 79, 86, 95, 94, 92, 91, 89, 90, 91,
+        92, 93, 95, 97, 100, 102, 105, 109, 112, 115, 119, 123, 127, 131, 131,
+        131, 131, 52, 56, 61, 67, 75, 79, 84, 89, 96, 93, 91, 89, 87, 87, 88,
+        88, 89, 91, 93, 96, 98, 101, 104, 107, 110, 114, 117, 121, 125, 125,
+        125, 125, 58, 62, 67, 72, 79, 83, 88, 93, 99, 97, 95, 93, 91, 92, 92,
+        93, 93, 95, 97, 99, 101, 104, 107, 110, 113, 117, 120, 124, 128, 128,
+        128, 128, 67, 70, 74, 79, 84, 88, 92, 97, 103, 101, 99, 98, 96, 96, 97,
+        97, 97, 99, 101, 103, 105, 108, 110, 113, 116, 120, 123, 127, 130, 130,
+        130, 130, 78, 80, 83, 86, 89, 93, 97, 102, 107, 106, 104, 103, 102, 102,
+        102, 102, 102, 104, 106, 107, 109, 112, 114, 117, 120, 123, 126, 130,
+        133, 133, 133, 133, 94, 94, 95, 95, 96, 99, 103, 107, 112, 111, 110,
+        109, 108, 108, 108, 108, 108, 109, 110, 112, 113, 116, 118, 121, 123,
+        126, 129, 133, 136, 136, 136, 136, 95, 95, 94, 94, 93, 97, 101, 106,
+        111, 111, 111, 111, 111, 111, 112, 112, 112, 113, 115, 116, 118, 120,
+        122, 125, 127, 130, 133, 137, 140, 140, 140, 140, 97, 95, 94, 92, 91,
+        95, 99, 104, 110, 111, 112, 113, 115, 115, 116, 116, 117, 118, 120, 121,
+        123, 125, 127, 129, 132, 135, 138, 141, 144, 144, 144, 144, 98, 95, 93,
+        91, 89, 93, 98, 103, 109, 111, 113, 116, 118, 119, 120, 121, 122, 124,
+        125, 127, 128, 130, 132, 134, 137, 139, 142, 145, 148, 148, 148, 148,
+        99, 96, 93, 89, 87, 91, 96, 102, 108, 111, 115, 118, 122, 124, 125, 127,
+        128, 130, 131, 132, 134, 136, 138, 140, 142, 144, 147, 150, 153, 153,
+        153, 153, 101, 97, 94, 90, 87, 92, 96, 102, 108, 111, 115, 119, 124,
+        125, 127, 129, 132, 133, 135, 136, 138, 140, 142, 144, 146, 149, 152,
+        154, 157, 157, 157, 157, 103, 99, 95, 91, 88, 92, 97, 102, 108, 112,
+        116, 120, 125, 127, 130, 133, 135, 137, 139, 141, 143, 145, 147, 149,
+        151, 154, 156, 159, 162, 162, 162, 162, 104, 100, 96, 92, 88, 93, 97,
+        102, 108, 112, 116, 121, 127, 129, 133, 136, 139, 141, 143, 145, 148,
+        150, 152, 154, 156, 159, 161, 164, 167, 167, 167, 167, 106, 101, 97, 93,
+        89, 93, 97, 102, 108, 112, 117, 122, 128, 132, 135, 139, 143, 145, 148,
+        150, 153, 155, 157, 160, 162, 164, 167, 169, 172, 172, 172, 172, 109,
+        104, 99, 95, 91, 95, 99, 104, 109, 113, 118, 124, 130, 133, 137, 141,
+        145, 148, 151, 154, 157, 159, 161, 164, 166, 169, 171, 174, 176, 176,
+        176, 176, 112, 107, 102, 97, 93, 97, 101, 106, 110, 115, 120, 125, 131,
+        135, 139, 143, 148, 151, 154, 157, 160, 163, 165, 168, 171, 173, 176,
+        178, 181, 181, 181, 181, 115, 110, 104, 100, 96, 99, 103, 107, 112, 116,
+        121, 127, 132, 136, 141, 145, 150, 154, 157, 161, 164, 167, 170, 172,
+        175, 178, 180, 183, 186, 186, 186, 186, 119, 113, 107, 102, 98, 101,
+        105, 109, 113, 118, 123, 128, 134, 138, 143, 148, 153, 157, 160, 164,
+        168, 171, 174, 177, 180, 183, 185, 188, 191, 191, 191, 191, 122, 116,
+        110, 105, 101, 104, 108, 112, 116, 120, 125, 130, 136, 140, 145, 150,
+        155, 159, 163, 167, 171, 174, 177, 180, 184, 186, 189, 192, 195, 195,
+        195, 195, 126, 120, 114, 109, 104, 107, 110, 114, 118, 122, 127, 132,
+        138, 142, 147, 152, 157, 161, 165, 170, 174, 177, 180, 184, 187, 190,
+        193, 196, 199, 199, 199, 199, 130, 123, 117, 112, 107, 110, 113, 117,
+        121, 125, 129, 134, 140, 144, 149, 154, 160, 164, 168, 172, 177, 180,
+        184, 187, 191, 194, 197, 200, 203, 203, 203, 203, 134, 127, 121, 115,
+        110, 113, 116, 120, 123, 127, 132, 137, 142, 146, 151, 156, 162, 166,
+        171, 175, 180, 184, 187, 191, 195, 198, 201, 204, 207, 207, 207, 207,
+        138, 131, 125, 119, 114, 117, 120, 123, 126, 130, 135, 139, 144, 149,
+        154, 159, 164, 169, 173, 178, 183, 186, 190, 194, 198, 201, 204, 207,
+        211, 211, 211, 211, 142, 135, 129, 123, 117, 120, 123, 126, 129, 133,
+        138, 142, 147, 152, 156, 161, 167, 171, 176, 180, 185, 189, 193, 197,
+        201, 204, 207, 211, 214, 214, 214, 214, 147, 139, 133, 127, 121, 124,
+        127, 130, 133, 137, 141, 145, 150, 154, 159, 164, 169, 174, 178, 183,
+        188, 192, 196, 200, 204, 207, 211, 214, 217, 217, 217, 217, 152, 144,
+        137, 131, 125, 128, 130, 133, 136, 140, 144, 148, 153, 157, 162, 167,
+        172, 176, 181, 186, 191, 195, 199, 203, 207, 211, 214, 217, 221, 221,
+        221, 221, 152, 144, 137, 131, 125, 128, 130, 133, 136, 140, 144, 148,
+        153, 157, 162, 167, 172, 176, 181, 186, 191, 195, 199, 203, 207, 211,
+        214, 217, 221, 221, 221, 221, 152, 144, 137, 131, 125, 128, 130, 133,
+        136, 140, 144, 148, 153, 157, 162, 167, 172, 176, 181, 186, 191, 195,
+        199, 203, 207, 211, 214, 217, 221, 221, 221, 221, 152, 144, 137, 131,
+        125, 128, 130, 133, 136, 140, 144, 148, 153, 157, 162, 167, 172, 176,
+        181, 186, 191, 195, 199, 203, 207, 211, 214, 217, 221, 221, 221, 221 },
+      { /* Intra matrices */
+        /* Size 4 */
+        27, 44, 48, 61, 44, 53, 57, 66, 48, 57, 74, 86, 61, 66, 86, 103,
+        /* Size 8 */
+        29, 23, 43, 46, 49, 56, 63, 72, 23, 34, 44, 40, 41, 45, 51, 59, 43, 44,
+        52, 50, 50, 53, 58, 64, 46, 40, 50, 57, 60, 63, 67, 73, 49, 41, 50, 60,
+        68, 73, 78, 83, 56, 45, 53, 63, 73, 81, 88, 93, 63, 51, 58, 67, 78, 88,
+        96, 102, 72, 59, 64, 73, 83, 93, 102, 110,
+        /* Size 16 */
+        28, 25, 23, 29, 42, 43, 45, 46, 48, 51, 54, 58, 62, 66, 70, 70, 25, 26,
+        27, 33, 42, 42, 42, 43, 44, 46, 49, 52, 55, 59, 63, 63, 23, 27, 33, 37,
+        43, 41, 39, 39, 40, 42, 44, 47, 50, 53, 57, 57, 29, 33, 37, 41, 46, 45,
+        43, 44, 44, 46, 47, 50, 53, 56, 60, 60, 42, 42, 43, 46, 51, 50, 49, 49,
+        49, 50, 51, 54, 56, 59, 63, 63, 43, 42, 41, 45, 50, 51, 52, 53, 53, 55,
+        56, 58, 61, 63, 67, 67, 45, 42, 39, 43, 49, 52, 56, 57, 59, 60, 61, 63,
+        65, 68, 71, 71, 46, 43, 39, 44, 49, 53, 57, 60, 62, 64, 66, 68, 70, 73,
+        75, 75, 48, 44, 40, 44, 49, 53, 59, 62, 66, 69, 71, 73, 76, 78, 81, 81,
+        51, 46, 42, 46, 50, 55, 60, 64, 69, 72, 75, 77, 80, 83, 85, 85, 54, 49,
+        44, 47, 51, 56, 61, 66, 71, 75, 79, 82, 85, 88, 91, 91, 58, 52, 47, 50,
+        54, 58, 63, 68, 73, 77, 82, 85, 89, 92, 95, 95, 62, 55, 50, 53, 56, 61,
+        65, 70, 76, 80, 85, 89, 93, 96, 99, 99, 66, 59, 53, 56, 59, 63, 68, 73,
+        78, 83, 88, 92, 96, 99, 103, 103, 70, 63, 57, 60, 63, 67, 71, 75, 81,
+        85, 91, 95, 99, 103, 107, 107, 70, 63, 57, 60, 63, 67, 71, 75, 81, 85,
+        91, 95, 99, 103, 107, 107,
+        /* Size 32 */
+        28, 26, 25, 23, 22, 25, 29, 34, 41, 42, 43, 43, 44, 45, 46, 46, 47, 49,
+        50, 52, 53, 55, 57, 59, 61, 63, 65, 67, 69, 69, 69, 69, 26, 26, 25, 25,
+        24, 27, 31, 35, 42, 42, 42, 42, 42, 43, 44, 44, 45, 46, 48, 49, 50, 52,
+        54, 55, 57, 59, 61, 63, 66, 66, 66, 66, 25, 25, 26, 26, 26, 29, 32, 37,
+        42, 42, 41, 41, 41, 41, 42, 42, 43, 44, 45, 46, 48, 49, 51, 53, 54, 56,
+        58, 60, 62, 62, 62, 62, 23, 25, 26, 28, 29, 32, 34, 38, 42, 41, 41, 40,
+        39, 40, 40, 41, 41, 42, 43, 44, 45, 47, 48, 50, 52, 53, 55, 57, 59, 59,
+        59, 59, 22, 24, 26, 29, 33, 35, 37, 39, 42, 41, 40, 39, 38, 38, 39, 39,
+        39, 40, 41, 42, 43, 45, 46, 48, 49, 51, 53, 54, 56, 56, 56, 56, 25, 27,
+        29, 32, 35, 37, 39, 41, 44, 43, 42, 41, 40, 40, 41, 41, 41, 42, 43, 44,
+        45, 46, 48, 49, 51, 52, 54, 56, 58, 58, 58, 58, 29, 31, 32, 34, 37, 39,
+        41, 43, 46, 45, 44, 43, 43, 43, 43, 43, 43, 44, 45, 46, 47, 48, 49, 51,
+        52, 54, 55, 57, 59, 59, 59, 59, 34, 35, 37, 38, 39, 41, 43, 45, 48, 47,
+        46, 46, 45, 45, 45, 45, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 57, 58,
+        60, 60, 60, 60, 41, 42, 42, 42, 42, 44, 46, 48, 50, 49, 49, 49, 48, 48,
+        48, 48, 48, 49, 49, 50, 51, 52, 53, 54, 55, 57, 58, 60, 62, 62, 62, 62,
+        42, 42, 42, 41, 41, 43, 45, 47, 49, 50, 50, 50, 50, 50, 50, 50, 50, 51,
+        51, 52, 53, 54, 55, 56, 57, 59, 60, 62, 64, 64, 64, 64, 43, 42, 41, 41,
+        40, 42, 44, 46, 49, 50, 50, 51, 51, 52, 52, 52, 52, 53, 54, 54, 55, 56,
+        57, 58, 60, 61, 62, 64, 65, 65, 65, 65, 43, 42, 41, 40, 39, 41, 43, 46,
+        49, 50, 51, 52, 53, 54, 54, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63,
+        65, 66, 68, 68, 68, 68, 44, 42, 41, 39, 38, 40, 43, 45, 48, 50, 51, 53,
+        55, 56, 56, 57, 58, 58, 59, 60, 61, 61, 62, 63, 64, 66, 67, 68, 70, 70,
+        70, 70, 45, 43, 41, 40, 38, 40, 43, 45, 48, 50, 52, 54, 56, 57, 57, 58,
+        59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 71, 72, 72, 72, 72, 46, 44,
+        42, 40, 39, 41, 43, 45, 48, 50, 52, 54, 56, 57, 59, 60, 61, 62, 63, 64,
+        65, 66, 67, 68, 69, 70, 72, 73, 74, 74, 74, 74, 46, 44, 42, 41, 39, 41,
+        43, 45, 48, 50, 52, 54, 57, 58, 60, 61, 63, 64, 65, 66, 67, 68, 70, 71,
+        72, 73, 74, 75, 77, 77, 77, 77, 47, 45, 43, 41, 39, 41, 43, 45, 48, 50,
+        52, 55, 58, 59, 61, 63, 65, 66, 67, 69, 70, 71, 72, 73, 75, 76, 77, 78,
+        79, 79, 79, 79, 49, 46, 44, 42, 40, 42, 44, 46, 49, 51, 53, 56, 58, 60,
+        62, 64, 66, 68, 69, 70, 72, 73, 74, 75, 77, 78, 79, 80, 82, 82, 82, 82,
+        50, 48, 45, 43, 41, 43, 45, 47, 49, 51, 54, 56, 59, 61, 63, 65, 67, 69,
+        70, 72, 74, 75, 76, 78, 79, 80, 81, 83, 84, 84, 84, 84, 52, 49, 46, 44,
+        42, 44, 46, 48, 50, 52, 54, 57, 60, 62, 64, 66, 69, 70, 72, 74, 76, 77,
+        78, 80, 81, 83, 84, 85, 87, 87, 87, 87, 53, 50, 48, 45, 43, 45, 47, 49,
+        51, 53, 55, 58, 61, 63, 65, 67, 70, 72, 74, 76, 78, 79, 81, 82, 84, 85,
+        86, 88, 89, 89, 89, 89, 55, 52, 49, 47, 45, 46, 48, 50, 52, 54, 56, 59,
+        61, 64, 66, 68, 71, 73, 75, 77, 79, 81, 82, 84, 86, 87, 88, 90, 91, 91,
+        91, 91, 57, 54, 51, 48, 46, 48, 49, 51, 53, 55, 57, 60, 62, 65, 67, 70,
+        72, 74, 76, 78, 81, 82, 84, 86, 87, 89, 90, 92, 93, 93, 93, 93, 59, 55,
+        53, 50, 48, 49, 51, 52, 54, 56, 58, 61, 63, 66, 68, 71, 73, 75, 78, 80,
+        82, 84, 86, 87, 89, 91, 92, 94, 96, 96, 96, 96, 61, 57, 54, 52, 49, 51,
+        52, 54, 55, 57, 60, 62, 64, 67, 69, 72, 75, 77, 79, 81, 84, 86, 87, 89,
+        91, 93, 95, 96, 98, 98, 98, 98, 63, 59, 56, 53, 51, 52, 54, 55, 57, 59,
+        61, 63, 66, 68, 70, 73, 76, 78, 80, 83, 85, 87, 89, 91, 93, 95, 96, 98,
+        100, 100, 100, 100, 65, 61, 58, 55, 53, 54, 55, 57, 58, 60, 62, 65, 67,
+        69, 72, 74, 77, 79, 81, 84, 86, 88, 90, 92, 95, 96, 98, 100, 102, 102,
+        102, 102, 67, 63, 60, 57, 54, 56, 57, 58, 60, 62, 64, 66, 68, 71, 73,
+        75, 78, 80, 83, 85, 88, 90, 92, 94, 96, 98, 100, 102, 103, 103, 103,
+        103, 69, 66, 62, 59, 56, 58, 59, 60, 62, 64, 65, 68, 70, 72, 74, 77, 79,
+        82, 84, 87, 89, 91, 93, 96, 98, 100, 102, 103, 105, 105, 105, 105, 69,
+        66, 62, 59, 56, 58, 59, 60, 62, 64, 65, 68, 70, 72, 74, 77, 79, 82, 84,
+        87, 89, 91, 93, 96, 98, 100, 102, 103, 105, 105, 105, 105, 69, 66, 62,
+        59, 56, 58, 59, 60, 62, 64, 65, 68, 70, 72, 74, 77, 79, 82, 84, 87, 89,
+        91, 93, 96, 98, 100, 102, 103, 105, 105, 105, 105, 69, 66, 62, 59, 56,
+        58, 59, 60, 62, 64, 65, 68, 70, 72, 74, 77, 79, 82, 84, 87, 89, 91, 93,
+        96, 98, 100, 102, 103, 105, 105, 105, 105 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 70, 112, 168, 70, 103, 139, 181, 112, 139, 188, 221, 168, 181, 221,
+        251,
+        /* Size 8 */
+        64, 49, 53, 68, 90, 115, 139, 163, 49, 56, 53, 62, 78, 99, 121, 144, 53,
+        53, 74, 85, 98, 115, 135, 154, 68, 62, 85, 106, 122, 137, 153, 169, 90,
+        78, 98, 122, 142, 158, 172, 185, 115, 99, 115, 137, 158, 174, 188, 199,
+        139, 121, 135, 153, 172, 188, 200, 210, 163, 144, 154, 169, 185, 199,
+        210, 220,
+        /* Size 16 */
+        64, 55, 49, 51, 53, 59, 68, 78, 90, 101, 115, 126, 139, 150, 163, 163,
+        55, 54, 52, 52, 53, 58, 65, 73, 84, 94, 106, 117, 130, 140, 153, 153,
+        49, 52, 56, 54, 53, 57, 62, 69, 78, 87, 99, 109, 121, 132, 144, 144, 51,
+        52, 54, 58, 62, 66, 72, 79, 87, 96, 106, 116, 128, 137, 149, 149, 53,
+        53, 53, 62, 74, 79, 85, 91, 98, 106, 115, 124, 135, 144, 154, 154, 59,
+        58, 57, 66, 79, 86, 94, 101, 109, 117, 125, 134, 143, 152, 161, 161, 68,
+        65, 62, 72, 85, 94, 106, 114, 122, 129, 137, 145, 153, 160, 169, 169,
+        78, 73, 69, 79, 91, 101, 114, 122, 131, 139, 147, 154, 162, 169, 176,
+        176, 90, 84, 78, 87, 98, 109, 122, 131, 142, 149, 158, 164, 172, 178,
+        185, 185, 101, 94, 87, 96, 106, 117, 129, 139, 149, 157, 166, 172, 179,
+        185, 191, 191, 115, 106, 99, 106, 115, 125, 137, 147, 158, 166, 174,
+        181, 188, 193, 199, 199, 126, 117, 109, 116, 124, 134, 145, 154, 164,
+        172, 181, 187, 194, 199, 204, 204, 139, 130, 121, 128, 135, 143, 153,
+        162, 172, 179, 188, 194, 200, 205, 210, 210, 150, 140, 132, 137, 144,
+        152, 160, 169, 178, 185, 193, 199, 205, 210, 215, 215, 163, 153, 144,
+        149, 154, 161, 169, 176, 185, 191, 199, 204, 210, 215, 220, 220, 163,
+        153, 144, 149, 154, 161, 169, 176, 185, 191, 199, 204, 210, 215, 220,
+        220,
+        /* Size 32 */
+        64, 59, 55, 52, 49, 50, 51, 52, 53, 56, 59, 64, 68, 73, 78, 83, 90, 95,
+        101, 107, 115, 120, 126, 132, 139, 145, 150, 156, 163, 163, 163, 163,
+        59, 57, 54, 52, 50, 51, 51, 52, 53, 56, 59, 63, 67, 71, 75, 81, 87, 92,
+        97, 103, 110, 115, 121, 127, 134, 140, 145, 151, 158, 158, 158, 158, 55,
+        54, 54, 53, 52, 52, 52, 53, 53, 55, 58, 62, 65, 69, 73, 78, 84, 88, 94,
+        99, 106, 111, 117, 123, 130, 135, 140, 146, 153, 153, 153, 153, 52, 52,
+        53, 53, 54, 54, 53, 53, 53, 55, 58, 61, 63, 67, 71, 76, 81, 85, 90, 96,
+        102, 107, 113, 119, 125, 130, 136, 142, 148, 148, 148, 148, 49, 50, 52,
+        54, 56, 55, 54, 54, 53, 55, 57, 60, 62, 65, 69, 73, 78, 83, 87, 93, 99,
+        104, 109, 115, 121, 126, 132, 137, 144, 144, 144, 144, 50, 51, 52, 54,
+        55, 56, 56, 57, 57, 59, 62, 64, 67, 70, 74, 78, 82, 87, 91, 97, 102,
+        107, 112, 118, 124, 129, 134, 140, 146, 146, 146, 146, 51, 51, 52, 53,
+        54, 56, 58, 60, 62, 64, 66, 69, 72, 75, 79, 83, 87, 91, 96, 101, 106,
+        111, 116, 122, 128, 132, 137, 143, 149, 149, 149, 149, 52, 52, 53, 53,
+        54, 57, 60, 63, 67, 70, 72, 75, 78, 81, 84, 88, 92, 96, 101, 105, 111,
+        115, 120, 125, 131, 136, 140, 146, 151, 151, 151, 151, 53, 53, 53, 53,
+        53, 57, 62, 67, 74, 76, 79, 82, 85, 88, 91, 95, 98, 102, 106, 111, 115,
+        120, 124, 129, 135, 139, 144, 149, 154, 154, 154, 154, 56, 56, 55, 55,
+        55, 59, 64, 70, 76, 79, 82, 86, 89, 93, 96, 99, 103, 107, 111, 115, 120,
+        124, 129, 134, 139, 143, 148, 152, 158, 158, 158, 158, 59, 59, 58, 58,
+        57, 62, 66, 72, 79, 82, 86, 90, 94, 98, 101, 105, 109, 113, 117, 121,
+        125, 129, 134, 138, 143, 147, 152, 156, 161, 161, 161, 161, 64, 63, 62,
+        61, 60, 64, 69, 75, 82, 86, 90, 95, 100, 103, 107, 111, 115, 119, 122,
+        127, 131, 135, 139, 143, 148, 152, 156, 160, 165, 165, 165, 165, 68, 67,
+        65, 63, 62, 67, 72, 78, 85, 89, 94, 100, 106, 110, 114, 118, 122, 125,
+        129, 133, 137, 141, 145, 149, 153, 157, 160, 165, 169, 169, 169, 169,
+        73, 71, 69, 67, 65, 70, 75, 81, 88, 93, 98, 103, 110, 113, 118, 122,
+        126, 130, 134, 138, 142, 145, 149, 153, 157, 161, 164, 168, 173, 173,
+        173, 173, 78, 75, 73, 71, 69, 74, 79, 84, 91, 96, 101, 107, 114, 118,
+        122, 126, 131, 135, 139, 142, 147, 150, 154, 158, 162, 165, 169, 172,
+        176, 176, 176, 176, 83, 81, 78, 76, 73, 78, 83, 88, 95, 99, 105, 111,
+        118, 122, 126, 131, 136, 140, 144, 148, 152, 155, 159, 163, 166, 170,
+        173, 177, 180, 180, 180, 180, 90, 87, 84, 81, 78, 82, 87, 92, 98, 103,
+        109, 115, 122, 126, 131, 136, 142, 146, 149, 153, 158, 161, 164, 168,
+        172, 175, 178, 181, 185, 185, 185, 185, 95, 92, 88, 85, 83, 87, 91, 96,
+        102, 107, 113, 119, 125, 130, 135, 140, 146, 149, 153, 157, 162, 165,
+        168, 172, 175, 178, 181, 185, 188, 188, 188, 188, 101, 97, 94, 90, 87,
+        91, 96, 101, 106, 111, 117, 122, 129, 134, 139, 144, 149, 153, 157, 161,
+        166, 169, 172, 176, 179, 182, 185, 188, 191, 191, 191, 191, 107, 103,
+        99, 96, 93, 97, 101, 105, 111, 115, 121, 127, 133, 138, 142, 148, 153,
+        157, 161, 165, 170, 173, 176, 180, 183, 186, 189, 192, 195, 195, 195,
+        195, 115, 110, 106, 102, 99, 102, 106, 111, 115, 120, 125, 131, 137,
+        142, 147, 152, 158, 162, 166, 170, 174, 177, 181, 184, 188, 190, 193,
+        196, 199, 199, 199, 199, 120, 115, 111, 107, 104, 107, 111, 115, 120,
+        124, 129, 135, 141, 145, 150, 155, 161, 165, 169, 173, 177, 181, 184,
+        187, 191, 193, 196, 199, 202, 202, 202, 202, 126, 121, 117, 113, 109,
+        112, 116, 120, 124, 129, 134, 139, 145, 149, 154, 159, 164, 168, 172,
+        176, 181, 184, 187, 190, 194, 196, 199, 202, 204, 204, 204, 204, 132,
+        127, 123, 119, 115, 118, 122, 125, 129, 134, 138, 143, 149, 153, 158,
+        163, 168, 172, 176, 180, 184, 187, 190, 194, 197, 199, 202, 205, 207,
+        207, 207, 207, 139, 134, 130, 125, 121, 124, 128, 131, 135, 139, 143,
+        148, 153, 157, 162, 166, 172, 175, 179, 183, 188, 191, 194, 197, 200,
+        203, 205, 208, 210, 210, 210, 210, 145, 140, 135, 130, 126, 129, 132,
+        136, 139, 143, 147, 152, 157, 161, 165, 170, 175, 178, 182, 186, 190,
+        193, 196, 199, 203, 205, 208, 210, 213, 213, 213, 213, 150, 145, 140,
+        136, 132, 134, 137, 140, 144, 148, 152, 156, 160, 164, 169, 173, 178,
+        181, 185, 189, 193, 196, 199, 202, 205, 208, 210, 212, 215, 215, 215,
+        215, 156, 151, 146, 142, 137, 140, 143, 146, 149, 152, 156, 160, 165,
+        168, 172, 177, 181, 185, 188, 192, 196, 199, 202, 205, 208, 210, 212,
+        215, 217, 217, 217, 217, 163, 158, 153, 148, 144, 146, 149, 151, 154,
+        158, 161, 165, 169, 173, 176, 180, 185, 188, 191, 195, 199, 202, 204,
+        207, 210, 213, 215, 217, 220, 220, 220, 220, 163, 158, 153, 148, 144,
+        146, 149, 151, 154, 158, 161, 165, 169, 173, 176, 180, 185, 188, 191,
+        195, 199, 202, 204, 207, 210, 213, 215, 217, 220, 220, 220, 220, 163,
+        158, 153, 148, 144, 146, 149, 151, 154, 158, 161, 165, 169, 173, 176,
+        180, 185, 188, 191, 195, 199, 202, 204, 207, 210, 213, 215, 217, 220,
+        220, 220, 220, 163, 158, 153, 148, 144, 146, 149, 151, 154, 158, 161,
+        165, 169, 173, 176, 180, 185, 188, 191, 195, 199, 202, 204, 207, 210,
+        213, 215, 217, 220, 220, 220, 220 },
+      { /* Intra matrices */
+        /* Size 4 */
+        24, 26, 43, 67, 26, 39, 54, 72, 43, 54, 75, 91, 67, 72, 91, 105,
+        /* Size 8 */
+        28, 21, 23, 30, 41, 53, 65, 77, 21, 25, 23, 27, 35, 45, 56, 68, 23, 23,
+        33, 38, 45, 53, 63, 73, 30, 27, 38, 49, 56, 64, 72, 81, 41, 35, 45, 56,
+        67, 75, 82, 89, 53, 45, 53, 64, 75, 84, 91, 97, 65, 56, 63, 72, 82, 91,
+        98, 104, 77, 68, 73, 81, 89, 97, 104, 109,
+        /* Size 16 */
+        27, 24, 21, 21, 22, 25, 29, 34, 39, 44, 51, 56, 63, 68, 75, 75, 24, 23,
+        22, 22, 22, 25, 28, 32, 36, 41, 47, 52, 58, 63, 70, 70, 21, 22, 24, 23,
+        23, 24, 27, 30, 34, 38, 43, 48, 54, 59, 65, 65, 21, 22, 23, 25, 26, 28,
+        31, 34, 38, 42, 47, 52, 57, 62, 68, 68, 22, 22, 23, 26, 32, 34, 37, 40,
+        43, 47, 51, 55, 61, 65, 70, 70, 25, 25, 24, 28, 34, 37, 41, 44, 48, 52,
+        56, 60, 65, 69, 74, 74, 29, 28, 27, 31, 37, 41, 47, 50, 54, 58, 62, 65,
+        70, 74, 78, 78, 34, 32, 30, 34, 40, 44, 50, 54, 59, 62, 67, 70, 74, 78,
+        82, 82, 39, 36, 34, 38, 43, 48, 54, 59, 64, 68, 72, 76, 79, 83, 86, 86,
+        44, 41, 38, 42, 47, 52, 58, 62, 68, 72, 76, 80, 83, 86, 90, 90, 51, 47,
+        43, 47, 51, 56, 62, 67, 72, 76, 81, 84, 88, 91, 94, 94, 56, 52, 48, 52,
+        55, 60, 65, 70, 76, 80, 84, 87, 91, 94, 97, 97, 63, 58, 54, 57, 61, 65,
+        70, 74, 79, 83, 88, 91, 95, 97, 100, 100, 68, 63, 59, 62, 65, 69, 74,
+        78, 83, 86, 91, 94, 97, 100, 103, 103, 75, 70, 65, 68, 70, 74, 78, 82,
+        86, 90, 94, 97, 100, 103, 105, 105, 75, 70, 65, 68, 70, 74, 78, 82, 86,
+        90, 94, 97, 100, 103, 105, 105,
+        /* Size 32 */
+        27, 25, 23, 22, 20, 21, 21, 21, 22, 23, 25, 27, 29, 31, 33, 36, 39, 41,
+        43, 46, 50, 52, 55, 58, 62, 64, 67, 70, 73, 73, 73, 73, 25, 24, 23, 22,
+        21, 21, 21, 22, 22, 23, 25, 26, 28, 30, 32, 34, 37, 39, 42, 45, 48, 50,
+        53, 56, 59, 62, 65, 67, 71, 71, 71, 71, 23, 23, 22, 22, 22, 22, 22, 22,
+        22, 23, 24, 26, 27, 29, 31, 33, 36, 38, 40, 43, 46, 48, 51, 54, 57, 60,
+        62, 65, 68, 68, 68, 68, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 24, 25,
+        27, 28, 30, 32, 34, 36, 39, 41, 44, 46, 49, 52, 55, 57, 60, 63, 66, 66,
+        66, 66, 20, 21, 22, 22, 23, 23, 23, 22, 22, 23, 24, 25, 26, 28, 29, 31,
+        33, 35, 37, 40, 43, 45, 47, 50, 53, 55, 58, 61, 64, 64, 64, 64, 21, 21,
+        22, 22, 23, 23, 23, 24, 24, 25, 26, 27, 28, 30, 31, 33, 35, 37, 39, 42,
+        44, 46, 49, 52, 54, 57, 59, 62, 65, 65, 65, 65, 21, 21, 22, 22, 23, 23,
+        24, 25, 26, 27, 28, 29, 30, 32, 33, 35, 37, 39, 41, 43, 46, 48, 51, 53,
+        56, 58, 61, 63, 66, 66, 66, 66, 21, 22, 22, 22, 22, 24, 25, 27, 28, 29,
+        30, 32, 33, 34, 36, 38, 40, 41, 43, 46, 48, 50, 52, 55, 58, 60, 62, 65,
+        68, 68, 68, 68, 22, 22, 22, 22, 22, 24, 26, 28, 31, 32, 33, 35, 36, 38,
+        39, 41, 42, 44, 46, 48, 50, 52, 54, 57, 59, 62, 64, 66, 69, 69, 69, 69,
+        23, 23, 23, 23, 23, 25, 27, 29, 32, 34, 35, 37, 38, 40, 41, 43, 45, 46,
+        48, 50, 52, 54, 57, 59, 61, 64, 66, 68, 71, 71, 71, 71, 25, 25, 24, 24,
+        24, 26, 28, 30, 33, 35, 37, 39, 41, 42, 44, 45, 47, 49, 51, 53, 55, 57,
+        59, 61, 64, 66, 68, 70, 73, 73, 73, 73, 27, 26, 26, 25, 25, 27, 29, 32,
+        35, 37, 39, 41, 43, 45, 46, 48, 50, 52, 54, 56, 58, 60, 61, 64, 66, 68,
+        70, 72, 74, 74, 74, 74, 29, 28, 27, 27, 26, 28, 30, 33, 36, 38, 41, 43,
+        46, 48, 49, 51, 53, 55, 57, 59, 61, 62, 64, 66, 68, 70, 72, 74, 76, 76,
+        76, 76, 31, 30, 29, 28, 28, 30, 32, 34, 38, 40, 42, 45, 48, 49, 51, 53,
+        56, 57, 59, 61, 63, 65, 66, 68, 71, 72, 74, 76, 78, 78, 78, 78, 33, 32,
+        31, 30, 29, 31, 33, 36, 39, 41, 44, 46, 49, 51, 53, 55, 58, 60, 61, 63,
+        65, 67, 69, 71, 73, 75, 76, 78, 80, 80, 80, 80, 36, 34, 33, 32, 31, 33,
+        35, 38, 41, 43, 45, 48, 51, 53, 55, 58, 60, 62, 64, 66, 68, 70, 71, 73,
+        75, 77, 79, 80, 82, 82, 82, 82, 39, 37, 36, 34, 33, 35, 37, 40, 42, 45,
+        47, 50, 53, 56, 58, 60, 63, 65, 67, 69, 71, 72, 74, 76, 78, 79, 81, 83,
+        85, 85, 85, 85, 41, 39, 38, 36, 35, 37, 39, 41, 44, 46, 49, 52, 55, 57,
+        60, 62, 65, 67, 69, 71, 73, 74, 76, 78, 80, 81, 83, 85, 86, 86, 86, 86,
+        43, 42, 40, 39, 37, 39, 41, 43, 46, 48, 51, 54, 57, 59, 61, 64, 67, 69,
+        70, 73, 75, 76, 78, 80, 82, 83, 85, 86, 88, 88, 88, 88, 46, 45, 43, 41,
+        40, 42, 43, 46, 48, 50, 53, 56, 59, 61, 63, 66, 69, 71, 73, 75, 77, 79,
+        80, 82, 84, 85, 87, 88, 90, 90, 90, 90, 50, 48, 46, 44, 43, 44, 46, 48,
+        50, 52, 55, 58, 61, 63, 65, 68, 71, 73, 75, 77, 79, 81, 83, 84, 86, 87,
+        89, 90, 92, 92, 92, 92, 52, 50, 48, 46, 45, 46, 48, 50, 52, 54, 57, 60,
+        62, 65, 67, 70, 72, 74, 76, 79, 81, 82, 84, 86, 88, 89, 90, 92, 93, 93,
+        93, 93, 55, 53, 51, 49, 47, 49, 51, 52, 54, 57, 59, 61, 64, 66, 69, 71,
+        74, 76, 78, 80, 83, 84, 86, 88, 89, 91, 92, 94, 95, 95, 95, 95, 58, 56,
+        54, 52, 50, 52, 53, 55, 57, 59, 61, 64, 66, 68, 71, 73, 76, 78, 80, 82,
+        84, 86, 88, 89, 91, 92, 94, 95, 97, 97, 97, 97, 62, 59, 57, 55, 53, 54,
+        56, 58, 59, 61, 64, 66, 68, 71, 73, 75, 78, 80, 82, 84, 86, 88, 89, 91,
+        93, 94, 95, 97, 98, 98, 98, 98, 64, 62, 60, 57, 55, 57, 58, 60, 62, 64,
+        66, 68, 70, 72, 75, 77, 79, 81, 83, 85, 87, 89, 91, 92, 94, 95, 97, 98,
+        99, 99, 99, 99, 67, 65, 62, 60, 58, 59, 61, 62, 64, 66, 68, 70, 72, 74,
+        76, 79, 81, 83, 85, 87, 89, 90, 92, 94, 95, 97, 98, 99, 101, 101, 101,
+        101, 70, 67, 65, 63, 61, 62, 63, 65, 66, 68, 70, 72, 74, 76, 78, 80, 83,
+        85, 86, 88, 90, 92, 94, 95, 97, 98, 99, 101, 102, 102, 102, 102, 73, 71,
+        68, 66, 64, 65, 66, 68, 69, 71, 73, 74, 76, 78, 80, 82, 85, 86, 88, 90,
+        92, 93, 95, 97, 98, 99, 101, 102, 103, 103, 103, 103, 73, 71, 68, 66,
+        64, 65, 66, 68, 69, 71, 73, 74, 76, 78, 80, 82, 85, 86, 88, 90, 92, 93,
+        95, 97, 98, 99, 101, 102, 103, 103, 103, 103, 73, 71, 68, 66, 64, 65,
+        66, 68, 69, 71, 73, 74, 76, 78, 80, 82, 85, 86, 88, 90, 92, 93, 95, 97,
+        98, 99, 101, 102, 103, 103, 103, 103, 73, 71, 68, 66, 64, 65, 66, 68,
+        69, 71, 73, 74, 76, 78, 80, 82, 85, 86, 88, 90, 92, 93, 95, 97, 98, 99,
+        101, 102, 103, 103, 103, 103 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 99, 107, 133, 99, 117, 124, 141, 107, 124, 156, 175, 133, 141, 175,
+        203,
+        /* Size 8 */
+        64, 52, 92, 97, 103, 114, 128, 143, 52, 74, 93, 85, 87, 96, 107, 120,
+        92, 93, 108, 105, 104, 109, 118, 129, 97, 85, 105, 117, 122, 127, 134,
+        143, 103, 87, 104, 122, 135, 144, 151, 159, 114, 96, 109, 127, 144, 156,
+        166, 175, 128, 107, 118, 134, 151, 166, 178, 188, 143, 120, 129, 143,
+        159, 175, 188, 198,
+        /* Size 16 */
+        64, 57, 52, 66, 92, 94, 97, 100, 103, 108, 114, 121, 128, 135, 143, 143,
+        57, 59, 61, 74, 93, 92, 91, 93, 95, 99, 104, 110, 116, 123, 130, 130,
+        52, 61, 74, 83, 93, 89, 85, 86, 87, 91, 96, 101, 107, 113, 120, 120, 66,
+        74, 83, 91, 100, 97, 94, 95, 95, 98, 102, 107, 112, 118, 124, 124, 92,
+        93, 93, 100, 108, 106, 105, 104, 104, 107, 109, 114, 118, 123, 129, 129,
+        94, 92, 89, 97, 106, 108, 111, 112, 113, 115, 118, 122, 126, 131, 136,
+        136, 97, 91, 85, 94, 105, 111, 117, 120, 122, 125, 127, 131, 134, 139,
+        143, 143, 100, 93, 86, 95, 104, 112, 120, 124, 129, 132, 135, 139, 142,
+        146, 151, 151, 103, 95, 87, 95, 104, 113, 122, 129, 135, 139, 144, 147,
+        151, 155, 159, 159, 108, 99, 91, 98, 107, 115, 125, 132, 139, 144, 150,
+        154, 158, 162, 167, 167, 114, 104, 96, 102, 109, 118, 127, 135, 144,
+        150, 156, 161, 166, 170, 175, 175, 121, 110, 101, 107, 114, 122, 131,
+        139, 147, 154, 161, 166, 172, 176, 181, 181, 128, 116, 107, 112, 118,
+        126, 134, 142, 151, 158, 166, 172, 178, 183, 188, 188, 135, 123, 113,
+        118, 123, 131, 139, 146, 155, 162, 170, 176, 183, 188, 193, 193, 143,
+        130, 120, 124, 129, 136, 143, 151, 159, 167, 175, 181, 188, 193, 198,
+        198, 143, 130, 120, 124, 129, 136, 143, 151, 159, 167, 175, 181, 188,
+        193, 198, 198,
+        /* Size 32 */
+        64, 61, 57, 55, 52, 58, 66, 77, 92, 93, 94, 96, 97, 98, 100, 101, 103,
+        106, 108, 111, 114, 117, 121, 124, 128, 131, 135, 139, 143, 143, 143,
+        143, 61, 59, 58, 57, 56, 62, 70, 79, 92, 93, 93, 93, 94, 95, 96, 97, 99,
+        101, 104, 106, 109, 112, 115, 118, 122, 125, 129, 132, 136, 136, 136,
+        136, 57, 58, 59, 60, 61, 67, 74, 82, 93, 92, 92, 91, 91, 92, 93, 94, 95,
+        97, 99, 101, 104, 107, 110, 113, 116, 119, 123, 126, 130, 130, 130, 130,
+        55, 57, 60, 63, 67, 72, 78, 85, 93, 92, 90, 89, 88, 89, 89, 90, 91, 93,
+        95, 97, 100, 102, 105, 108, 111, 114, 118, 121, 125, 125, 125, 125, 52,
+        56, 61, 67, 74, 78, 83, 88, 93, 91, 89, 87, 85, 86, 86, 87, 87, 89, 91,
+        93, 96, 98, 101, 104, 107, 110, 113, 116, 120, 120, 120, 120, 58, 62,
+        67, 72, 78, 82, 86, 91, 97, 95, 93, 91, 89, 90, 90, 91, 91, 93, 95, 97,
+        99, 101, 104, 106, 109, 112, 115, 119, 122, 122, 122, 122, 66, 70, 74,
+        78, 83, 86, 91, 95, 100, 99, 97, 95, 94, 94, 95, 95, 95, 97, 98, 100,
+        102, 104, 107, 109, 112, 115, 118, 121, 124, 124, 124, 124, 77, 79, 82,
+        85, 88, 91, 95, 99, 104, 103, 101, 100, 99, 99, 99, 99, 100, 101, 102,
+        104, 106, 108, 110, 113, 115, 118, 121, 124, 127, 127, 127, 127, 92, 92,
+        93, 93, 93, 97, 100, 104, 108, 107, 106, 105, 105, 105, 104, 104, 104,
+        106, 107, 108, 109, 111, 114, 116, 118, 121, 123, 126, 129, 129, 129,
+        129, 93, 93, 92, 92, 91, 95, 99, 103, 107, 107, 107, 107, 108, 108, 108,
+        108, 108, 110, 111, 112, 113, 115, 117, 120, 122, 124, 127, 130, 133,
+        133, 133, 133, 94, 93, 92, 90, 89, 93, 97, 101, 106, 107, 108, 109, 111,
+        111, 112, 112, 113, 114, 115, 116, 118, 120, 122, 124, 126, 128, 131,
+        133, 136, 136, 136, 136, 96, 93, 91, 89, 87, 91, 95, 100, 105, 107, 109,
+        112, 114, 115, 116, 116, 117, 119, 120, 121, 122, 124, 126, 128, 130,
+        132, 135, 137, 140, 140, 140, 140, 97, 94, 91, 88, 85, 89, 94, 99, 105,
+        108, 111, 114, 117, 118, 120, 121, 122, 124, 125, 126, 127, 129, 131,
+        132, 134, 136, 139, 141, 143, 143, 143, 143, 98, 95, 92, 89, 86, 90, 94,
+        99, 105, 108, 111, 115, 118, 120, 122, 124, 125, 127, 128, 130, 131,
+        133, 134, 136, 138, 140, 142, 145, 147, 147, 147, 147, 100, 96, 93, 89,
+        86, 90, 95, 99, 104, 108, 112, 116, 120, 122, 124, 126, 129, 130, 132,
+        133, 135, 137, 139, 140, 142, 144, 146, 149, 151, 151, 151, 151, 101,
+        97, 94, 90, 87, 91, 95, 99, 104, 108, 112, 116, 121, 124, 126, 129, 132,
+        134, 135, 137, 139, 141, 143, 145, 147, 149, 151, 153, 155, 155, 155,
+        155, 103, 99, 95, 91, 87, 91, 95, 100, 104, 108, 113, 117, 122, 125,
+        129, 132, 135, 137, 139, 142, 144, 146, 147, 149, 151, 153, 155, 157,
+        159, 159, 159, 159, 106, 101, 97, 93, 89, 93, 97, 101, 106, 110, 114,
+        119, 124, 127, 130, 134, 137, 140, 142, 144, 147, 149, 151, 153, 155,
+        157, 159, 161, 163, 163, 163, 163, 108, 104, 99, 95, 91, 95, 98, 102,
+        107, 111, 115, 120, 125, 128, 132, 135, 139, 142, 144, 147, 150, 152,
+        154, 156, 158, 160, 162, 164, 167, 167, 167, 167, 111, 106, 101, 97, 93,
+        97, 100, 104, 108, 112, 116, 121, 126, 130, 133, 137, 142, 144, 147,
+        150, 153, 155, 157, 160, 162, 164, 166, 168, 170, 170, 170, 170, 114,
+        109, 104, 100, 96, 99, 102, 106, 109, 113, 118, 122, 127, 131, 135, 139,
+        144, 147, 150, 153, 156, 159, 161, 164, 166, 168, 170, 172, 175, 175,
+        175, 175, 117, 112, 107, 102, 98, 101, 104, 108, 111, 115, 120, 124,
+        129, 133, 137, 141, 146, 149, 152, 155, 159, 161, 164, 166, 169, 171,
+        173, 175, 178, 178, 178, 178, 121, 115, 110, 105, 101, 104, 107, 110,
+        114, 117, 122, 126, 131, 134, 139, 143, 147, 151, 154, 157, 161, 164,
+        166, 169, 172, 174, 176, 178, 181, 181, 181, 181, 124, 118, 113, 108,
+        104, 106, 109, 113, 116, 120, 124, 128, 132, 136, 140, 145, 149, 153,
+        156, 160, 164, 166, 169, 172, 175, 177, 179, 182, 184, 184, 184, 184,
+        128, 122, 116, 111, 107, 109, 112, 115, 118, 122, 126, 130, 134, 138,
+        142, 147, 151, 155, 158, 162, 166, 169, 172, 175, 178, 180, 183, 185,
+        188, 188, 188, 188, 131, 125, 119, 114, 110, 112, 115, 118, 121, 124,
+        128, 132, 136, 140, 144, 149, 153, 157, 160, 164, 168, 171, 174, 177,
+        180, 183, 185, 188, 190, 190, 190, 190, 135, 129, 123, 118, 113, 115,
+        118, 121, 123, 127, 131, 135, 139, 142, 146, 151, 155, 159, 162, 166,
+        170, 173, 176, 179, 183, 185, 188, 190, 193, 193, 193, 193, 139, 132,
+        126, 121, 116, 119, 121, 124, 126, 130, 133, 137, 141, 145, 149, 153,
+        157, 161, 164, 168, 172, 175, 178, 182, 185, 188, 190, 193, 196, 196,
+        196, 196, 143, 136, 130, 125, 120, 122, 124, 127, 129, 133, 136, 140,
+        143, 147, 151, 155, 159, 163, 167, 170, 175, 178, 181, 184, 188, 190,
+        193, 196, 198, 198, 198, 198, 143, 136, 130, 125, 120, 122, 124, 127,
+        129, 133, 136, 140, 143, 147, 151, 155, 159, 163, 167, 170, 175, 178,
+        181, 184, 188, 190, 193, 196, 198, 198, 198, 198, 143, 136, 130, 125,
+        120, 122, 124, 127, 129, 133, 136, 140, 143, 147, 151, 155, 159, 163,
+        167, 170, 175, 178, 181, 184, 188, 190, 193, 196, 198, 198, 198, 198,
+        143, 136, 130, 125, 120, 122, 124, 127, 129, 133, 136, 140, 143, 147,
+        151, 155, 159, 163, 167, 170, 175, 178, 181, 184, 188, 190, 193, 196,
+        198, 198, 198, 198 },
+      { /* Intra matrices */
+        /* Size 4 */
+        29, 46, 49, 62, 46, 54, 58, 66, 49, 58, 74, 84, 62, 66, 84, 99,
+        /* Size 8 */
+        31, 25, 45, 48, 51, 57, 64, 72, 25, 36, 46, 42, 43, 47, 53, 60, 45, 46,
+        54, 52, 52, 54, 59, 65, 48, 42, 52, 59, 61, 64, 68, 73, 51, 43, 52, 61,
+        68, 73, 77, 82, 57, 47, 54, 64, 73, 80, 86, 90, 64, 53, 59, 68, 77, 86,
+        92, 98, 72, 60, 65, 73, 82, 90, 98, 104,
+        /* Size 16 */
+        30, 27, 24, 31, 44, 45, 47, 48, 50, 53, 56, 59, 63, 66, 71, 71, 27, 28,
+        29, 35, 44, 44, 43, 44, 45, 48, 50, 53, 57, 60, 64, 64, 24, 29, 35, 39,
+        45, 43, 41, 41, 42, 44, 46, 49, 52, 55, 58, 58, 31, 35, 39, 43, 48, 47,
+        45, 45, 46, 47, 49, 52, 54, 58, 61, 61, 44, 44, 45, 48, 52, 51, 51, 51,
+        50, 52, 53, 55, 58, 60, 63, 63, 45, 44, 43, 47, 51, 53, 54, 54, 55, 56,
+        57, 59, 62, 64, 67, 67, 47, 43, 41, 45, 51, 54, 57, 58, 60, 61, 62, 64,
+        66, 68, 71, 71, 48, 44, 41, 45, 51, 54, 58, 61, 63, 65, 67, 68, 70, 73,
+        75, 75, 50, 45, 42, 46, 50, 55, 60, 63, 67, 69, 71, 73, 75, 77, 80, 80,
+        53, 48, 44, 47, 52, 56, 61, 65, 69, 72, 74, 77, 79, 81, 84, 84, 56, 50,
+        46, 49, 53, 57, 62, 67, 71, 74, 78, 81, 83, 86, 88, 88, 59, 53, 49, 52,
+        55, 59, 64, 68, 73, 77, 81, 83, 87, 89, 92, 92, 63, 57, 52, 54, 58, 62,
+        66, 70, 75, 79, 83, 87, 90, 93, 95, 95, 66, 60, 55, 58, 60, 64, 68, 73,
+        77, 81, 86, 89, 93, 95, 98, 98, 71, 64, 58, 61, 63, 67, 71, 75, 80, 84,
+        88, 92, 95, 98, 102, 102, 71, 64, 58, 61, 63, 67, 71, 75, 80, 84, 88,
+        92, 95, 98, 102, 102,
+        /* Size 32 */
+        30, 28, 27, 25, 24, 27, 31, 36, 43, 44, 45, 45, 46, 47, 48, 48, 49, 50,
+        52, 53, 55, 56, 58, 60, 62, 64, 65, 67, 70, 70, 70, 70, 28, 28, 27, 27,
+        26, 29, 33, 37, 44, 44, 44, 44, 44, 45, 46, 46, 47, 48, 49, 51, 52, 54,
+        55, 57, 59, 60, 62, 64, 66, 66, 66, 66, 27, 27, 28, 28, 28, 31, 35, 39,
+        44, 44, 43, 43, 43, 43, 44, 44, 45, 46, 47, 48, 50, 51, 53, 54, 56, 58,
+        59, 61, 63, 63, 63, 63, 25, 27, 28, 30, 31, 34, 37, 40, 44, 43, 43, 42,
+        41, 42, 42, 43, 43, 44, 45, 46, 47, 49, 50, 52, 53, 55, 57, 58, 60, 60,
+        60, 60, 24, 26, 28, 31, 35, 37, 39, 41, 44, 43, 42, 41, 40, 40, 41, 41,
+        41, 42, 43, 44, 45, 47, 48, 49, 51, 53, 54, 56, 58, 58, 58, 58, 27, 29,
+        31, 34, 37, 39, 41, 43, 46, 45, 44, 43, 42, 42, 43, 43, 43, 44, 45, 46,
+        47, 48, 49, 51, 52, 54, 55, 57, 59, 59, 59, 59, 31, 33, 35, 37, 39, 41,
+        43, 45, 48, 47, 46, 45, 45, 45, 45, 45, 45, 46, 47, 48, 49, 50, 51, 52,
+        54, 55, 57, 58, 60, 60, 60, 60, 36, 37, 39, 40, 41, 43, 45, 47, 50, 49,
+        48, 48, 47, 47, 47, 47, 47, 48, 49, 50, 50, 52, 53, 54, 55, 57, 58, 60,
+        61, 61, 61, 61, 43, 44, 44, 44, 44, 46, 48, 50, 52, 51, 51, 50, 50, 50,
+        50, 50, 50, 50, 51, 52, 52, 53, 54, 56, 57, 58, 60, 61, 63, 63, 63, 63,
+        44, 44, 44, 43, 43, 45, 47, 49, 51, 51, 51, 51, 51, 51, 52, 52, 52, 52,
+        53, 54, 54, 55, 56, 58, 59, 60, 61, 63, 64, 64, 64, 64, 45, 44, 43, 43,
+        42, 44, 46, 48, 51, 51, 52, 52, 53, 53, 53, 54, 54, 55, 55, 56, 57, 58,
+        59, 60, 61, 62, 63, 65, 66, 66, 66, 66, 45, 44, 43, 42, 41, 43, 45, 48,
+        50, 51, 52, 53, 55, 55, 55, 56, 56, 57, 58, 58, 59, 60, 61, 62, 63, 64,
+        65, 67, 68, 68, 68, 68, 46, 44, 43, 41, 40, 42, 45, 47, 50, 51, 53, 55,
+        56, 57, 58, 58, 59, 60, 60, 61, 62, 62, 63, 64, 65, 66, 68, 69, 70, 70,
+        70, 70, 47, 45, 43, 42, 40, 42, 45, 47, 50, 51, 53, 55, 57, 58, 59, 60,
+        61, 61, 62, 63, 64, 64, 65, 66, 67, 68, 70, 71, 72, 72, 72, 72, 48, 46,
+        44, 42, 41, 43, 45, 47, 50, 52, 53, 55, 58, 59, 60, 61, 62, 63, 64, 65,
+        66, 67, 67, 68, 69, 71, 72, 73, 74, 74, 74, 74, 48, 46, 44, 43, 41, 43,
+        45, 47, 50, 52, 54, 56, 58, 60, 61, 62, 64, 65, 66, 67, 68, 69, 70, 71,
+        72, 73, 74, 75, 76, 76, 76, 76, 49, 47, 45, 43, 41, 43, 45, 47, 50, 52,
+        54, 56, 59, 61, 62, 64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
+        79, 79, 79, 79, 50, 48, 46, 44, 42, 44, 46, 48, 50, 52, 55, 57, 60, 61,
+        63, 65, 67, 68, 69, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 80, 80, 80,
+        52, 49, 47, 45, 43, 45, 47, 49, 51, 53, 55, 58, 60, 62, 64, 66, 68, 69,
+        71, 72, 73, 75, 76, 77, 78, 79, 80, 81, 82, 82, 82, 82, 53, 51, 48, 46,
+        44, 46, 48, 50, 52, 54, 56, 58, 61, 63, 65, 67, 69, 70, 72, 74, 75, 76,
+        78, 79, 80, 81, 82, 83, 85, 85, 85, 85, 55, 52, 50, 47, 45, 47, 49, 50,
+        52, 54, 57, 59, 62, 64, 66, 68, 70, 72, 73, 75, 77, 78, 80, 81, 82, 83,
+        84, 86, 87, 87, 87, 87, 56, 54, 51, 49, 47, 48, 50, 52, 53, 55, 58, 60,
+        62, 64, 67, 69, 71, 73, 75, 76, 78, 80, 81, 82, 84, 85, 86, 87, 89, 89,
+        89, 89, 58, 55, 53, 50, 48, 49, 51, 53, 54, 56, 59, 61, 63, 65, 67, 70,
+        72, 74, 76, 78, 80, 81, 82, 84, 85, 87, 88, 89, 90, 90, 90, 90, 60, 57,
+        54, 52, 49, 51, 52, 54, 56, 58, 60, 62, 64, 66, 68, 71, 73, 75, 77, 79,
+        81, 82, 84, 85, 87, 88, 90, 91, 92, 92, 92, 92, 62, 59, 56, 53, 51, 52,
+        54, 55, 57, 59, 61, 63, 65, 67, 69, 72, 74, 76, 78, 80, 82, 84, 85, 87,
+        89, 90, 91, 93, 94, 94, 94, 94, 64, 60, 58, 55, 53, 54, 55, 57, 58, 60,
+        62, 64, 66, 68, 71, 73, 75, 77, 79, 81, 83, 85, 87, 88, 90, 91, 93, 94,
+        96, 96, 96, 96, 65, 62, 59, 57, 54, 55, 57, 58, 60, 61, 63, 65, 68, 70,
+        72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 91, 93, 94, 96, 97, 97, 97, 97,
+        67, 64, 61, 58, 56, 57, 58, 60, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79,
+        81, 83, 86, 87, 89, 91, 93, 94, 96, 97, 99, 99, 99, 99, 70, 66, 63, 60,
+        58, 59, 60, 61, 63, 64, 66, 68, 70, 72, 74, 76, 79, 80, 82, 85, 87, 89,
+        90, 92, 94, 96, 97, 99, 100, 100, 100, 100, 70, 66, 63, 60, 58, 59, 60,
+        61, 63, 64, 66, 68, 70, 72, 74, 76, 79, 80, 82, 85, 87, 89, 90, 92, 94,
+        96, 97, 99, 100, 100, 100, 100, 70, 66, 63, 60, 58, 59, 60, 61, 63, 64,
+        66, 68, 70, 72, 74, 76, 79, 80, 82, 85, 87, 89, 90, 92, 94, 96, 97, 99,
+        100, 100, 100, 100, 70, 66, 63, 60, 58, 59, 60, 61, 63, 64, 66, 68, 70,
+        72, 74, 76, 79, 80, 82, 85, 87, 89, 90, 92, 94, 96, 97, 99, 100, 100,
+        100, 100 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 69, 108, 155, 69, 100, 131, 166, 108, 131, 171, 197, 155, 166, 197,
+        218,
+        /* Size 8 */
+        64, 49, 53, 68, 88, 109, 130, 149, 49, 56, 54, 62, 77, 96, 115, 134, 53,
+        54, 73, 83, 95, 110, 126, 142, 68, 62, 83, 102, 116, 128, 141, 154, 88,
+        77, 95, 116, 132, 145, 156, 166, 109, 96, 110, 128, 145, 158, 168, 176,
+        130, 115, 126, 141, 156, 168, 177, 185, 149, 134, 142, 154, 166, 176,
+        185, 191,
+        /* Size 16 */
+        64, 56, 49, 51, 53, 60, 68, 77, 88, 97, 109, 119, 130, 139, 149, 149,
+        56, 54, 52, 53, 53, 59, 65, 73, 82, 91, 102, 111, 122, 131, 141, 141,
+        49, 52, 56, 55, 54, 58, 62, 69, 77, 85, 96, 104, 115, 124, 134, 134, 51,
+        53, 55, 58, 62, 66, 71, 78, 85, 93, 102, 111, 120, 128, 138, 138, 53,
+        53, 54, 62, 73, 78, 83, 89, 95, 102, 110, 117, 126, 134, 142, 142, 60,
+        59, 58, 66, 78, 84, 92, 98, 104, 111, 118, 125, 133, 140, 148, 148, 68,
+        65, 62, 71, 83, 92, 102, 108, 116, 122, 128, 134, 141, 147, 154, 154,
+        77, 73, 69, 78, 89, 98, 108, 115, 123, 129, 136, 142, 148, 153, 159,
+        159, 88, 82, 77, 85, 95, 104, 116, 123, 132, 138, 145, 150, 156, 161,
+        166, 166, 97, 91, 85, 93, 102, 111, 122, 129, 138, 144, 151, 156, 162,
+        166, 171, 171, 109, 102, 96, 102, 110, 118, 128, 136, 145, 151, 158,
+        163, 168, 172, 176, 176, 119, 111, 104, 111, 117, 125, 134, 142, 150,
+        156, 163, 168, 173, 176, 180, 180, 130, 122, 115, 120, 126, 133, 141,
+        148, 156, 162, 168, 173, 177, 181, 185, 185, 139, 131, 124, 128, 134,
+        140, 147, 153, 161, 166, 172, 176, 181, 184, 188, 188, 149, 141, 134,
+        138, 142, 148, 154, 159, 166, 171, 176, 180, 185, 188, 191, 191, 149,
+        141, 134, 138, 142, 148, 154, 159, 166, 171, 176, 180, 185, 188, 191,
+        191,
+        /* Size 32 */
+        64, 60, 56, 52, 49, 50, 51, 52, 53, 56, 60, 64, 68, 72, 77, 82, 88, 92,
+        97, 103, 109, 114, 119, 124, 130, 134, 139, 144, 149, 149, 149, 149, 60,
+        57, 55, 53, 51, 51, 52, 53, 53, 56, 59, 63, 66, 70, 75, 79, 85, 89, 94,
+        100, 106, 110, 115, 120, 126, 130, 135, 140, 145, 145, 145, 145, 56, 55,
+        54, 53, 52, 53, 53, 53, 53, 56, 59, 62, 65, 69, 73, 77, 82, 86, 91, 96,
+        102, 106, 111, 116, 122, 126, 131, 136, 141, 141, 141, 141, 52, 53, 53,
+        54, 54, 54, 54, 54, 54, 56, 58, 61, 64, 67, 71, 75, 80, 84, 88, 93, 99,
+        103, 108, 113, 118, 123, 127, 132, 137, 137, 137, 137, 49, 51, 52, 54,
+        56, 55, 55, 54, 54, 56, 58, 60, 62, 65, 69, 73, 77, 81, 85, 90, 96, 100,
+        104, 109, 115, 119, 124, 128, 134, 134, 134, 134, 50, 51, 53, 54, 55,
+        56, 56, 57, 58, 60, 62, 64, 66, 70, 73, 77, 81, 85, 89, 94, 99, 103,
+        107, 112, 118, 122, 126, 131, 136, 136, 136, 136, 51, 52, 53, 54, 55,
+        56, 58, 60, 62, 64, 66, 69, 71, 74, 78, 81, 85, 89, 93, 97, 102, 106,
+        111, 115, 120, 124, 128, 133, 138, 138, 138, 138, 52, 53, 53, 54, 54,
+        57, 60, 63, 67, 69, 72, 74, 77, 80, 83, 86, 90, 94, 97, 101, 106, 110,
+        114, 118, 123, 127, 131, 135, 140, 140, 140, 140, 53, 53, 53, 54, 54,
+        58, 62, 67, 73, 75, 78, 80, 83, 86, 89, 92, 95, 99, 102, 106, 110, 114,
+        117, 122, 126, 130, 134, 138, 142, 142, 142, 142, 56, 56, 56, 56, 56,
+        60, 64, 69, 75, 78, 81, 84, 87, 90, 93, 96, 100, 103, 106, 110, 114,
+        118, 121, 125, 130, 133, 137, 141, 145, 145, 145, 145, 60, 59, 59, 58,
+        58, 62, 66, 72, 78, 81, 84, 88, 92, 95, 98, 101, 104, 108, 111, 115,
+        118, 122, 125, 129, 133, 136, 140, 144, 148, 148, 148, 148, 64, 63, 62,
+        61, 60, 64, 69, 74, 80, 84, 88, 92, 97, 100, 103, 106, 110, 113, 116,
+        119, 123, 126, 130, 133, 137, 140, 143, 147, 151, 151, 151, 151, 68, 66,
+        65, 64, 62, 66, 71, 77, 83, 87, 92, 97, 102, 105, 108, 112, 116, 119,
+        122, 125, 128, 131, 134, 138, 141, 144, 147, 150, 154, 154, 154, 154,
+        72, 70, 69, 67, 65, 70, 74, 80, 86, 90, 95, 100, 105, 108, 112, 116,
+        119, 122, 125, 129, 132, 135, 138, 141, 144, 147, 150, 153, 156, 156,
+        156, 156, 77, 75, 73, 71, 69, 73, 78, 83, 89, 93, 98, 103, 108, 112,
+        115, 119, 123, 126, 129, 133, 136, 139, 142, 145, 148, 151, 153, 156,
+        159, 159, 159, 159, 82, 79, 77, 75, 73, 77, 81, 86, 92, 96, 101, 106,
+        112, 116, 119, 123, 128, 131, 134, 137, 140, 143, 146, 149, 152, 154,
+        157, 160, 162, 162, 162, 162, 88, 85, 82, 80, 77, 81, 85, 90, 95, 100,
+        104, 110, 116, 119, 123, 128, 132, 135, 138, 141, 145, 147, 150, 153,
+        156, 158, 161, 163, 166, 166, 166, 166, 92, 89, 86, 84, 81, 85, 89, 94,
+        99, 103, 108, 113, 119, 122, 126, 131, 135, 138, 141, 144, 148, 150,
+        153, 156, 159, 161, 163, 166, 168, 168, 168, 168, 97, 94, 91, 88, 85,
+        89, 93, 97, 102, 106, 111, 116, 122, 125, 129, 134, 138, 141, 144, 148,
+        151, 154, 156, 159, 162, 164, 166, 168, 171, 171, 171, 171, 103, 100,
+        96, 93, 90, 94, 97, 101, 106, 110, 115, 119, 125, 129, 133, 137, 141,
+        144, 148, 151, 154, 157, 159, 162, 165, 167, 169, 171, 174, 174, 174,
+        174, 109, 106, 102, 99, 96, 99, 102, 106, 110, 114, 118, 123, 128, 132,
+        136, 140, 145, 148, 151, 154, 158, 160, 163, 165, 168, 170, 172, 174,
+        176, 176, 176, 176, 114, 110, 106, 103, 100, 103, 106, 110, 114, 118,
+        122, 126, 131, 135, 139, 143, 147, 150, 154, 157, 160, 163, 165, 168,
+        170, 172, 174, 176, 178, 178, 178, 178, 119, 115, 111, 108, 104, 107,
+        111, 114, 117, 121, 125, 130, 134, 138, 142, 146, 150, 153, 156, 159,
+        163, 165, 168, 170, 173, 174, 176, 178, 180, 180, 180, 180, 124, 120,
+        116, 113, 109, 112, 115, 118, 122, 125, 129, 133, 138, 141, 145, 149,
+        153, 156, 159, 162, 165, 168, 170, 172, 175, 177, 179, 181, 183, 183,
+        183, 183, 130, 126, 122, 118, 115, 118, 120, 123, 126, 130, 133, 137,
+        141, 144, 148, 152, 156, 159, 162, 165, 168, 170, 173, 175, 177, 179,
+        181, 183, 185, 185, 185, 185, 134, 130, 126, 123, 119, 122, 124, 127,
+        130, 133, 136, 140, 144, 147, 151, 154, 158, 161, 164, 167, 170, 172,
+        174, 177, 179, 181, 183, 185, 186, 186, 186, 186, 139, 135, 131, 127,
+        124, 126, 128, 131, 134, 137, 140, 143, 147, 150, 153, 157, 161, 163,
+        166, 169, 172, 174, 176, 179, 181, 183, 184, 186, 188, 188, 188, 188,
+        144, 140, 136, 132, 128, 131, 133, 135, 138, 141, 144, 147, 150, 153,
+        156, 160, 163, 166, 168, 171, 174, 176, 178, 181, 183, 185, 186, 188,
+        190, 190, 190, 190, 149, 145, 141, 137, 134, 136, 138, 140, 142, 145,
+        148, 151, 154, 156, 159, 162, 166, 168, 171, 174, 176, 178, 180, 183,
+        185, 186, 188, 190, 191, 191, 191, 191, 149, 145, 141, 137, 134, 136,
+        138, 140, 142, 145, 148, 151, 154, 156, 159, 162, 166, 168, 171, 174,
+        176, 178, 180, 183, 185, 186, 188, 190, 191, 191, 191, 191, 149, 145,
+        141, 137, 134, 136, 138, 140, 142, 145, 148, 151, 154, 156, 159, 162,
+        166, 168, 171, 174, 176, 178, 180, 183, 185, 186, 188, 190, 191, 191,
+        191, 191, 149, 145, 141, 137, 134, 136, 138, 140, 142, 145, 148, 151,
+        154, 156, 159, 162, 166, 168, 171, 174, 176, 178, 180, 183, 185, 186,
+        188, 190, 191, 191, 191, 191 },
+      { /* Intra matrices */
+        /* Size 4 */
+        26, 29, 46, 68, 29, 42, 57, 73, 46, 57, 75, 88, 68, 73, 88, 99,
+        /* Size 8 */
+        31, 24, 26, 33, 44, 55, 67, 77, 24, 27, 26, 30, 38, 48, 58, 69, 26, 26,
+        36, 41, 47, 55, 64, 73, 33, 30, 41, 51, 59, 65, 73, 80, 44, 38, 47, 59,
+        68, 75, 81, 87, 55, 48, 55, 65, 75, 82, 88, 93, 67, 58, 64, 73, 81, 88,
+        94, 98, 77, 69, 73, 80, 87, 93, 98, 102,
+        /* Size 16 */
+        30, 26, 23, 24, 25, 28, 32, 37, 42, 47, 53, 58, 64, 69, 75, 75, 26, 25,
+        25, 25, 25, 28, 31, 34, 39, 44, 49, 54, 60, 65, 70, 70, 23, 25, 26, 26,
+        25, 27, 29, 33, 37, 41, 46, 51, 56, 61, 66, 66, 24, 25, 26, 27, 29, 31,
+        34, 37, 41, 45, 50, 54, 59, 63, 69, 69, 25, 25, 25, 29, 35, 37, 40, 43,
+        46, 49, 54, 58, 62, 66, 71, 71, 28, 28, 27, 31, 37, 40, 44, 47, 51, 54,
+        58, 62, 66, 70, 74, 74, 32, 31, 29, 34, 40, 44, 50, 53, 57, 60, 63, 67,
+        70, 74, 77, 77, 37, 34, 33, 37, 43, 47, 53, 57, 61, 64, 68, 71, 74, 77,
+        81, 81, 42, 39, 37, 41, 46, 51, 57, 61, 66, 69, 72, 75, 79, 81, 84, 84,
+        47, 44, 41, 45, 49, 54, 60, 64, 69, 72, 76, 79, 82, 84, 87, 87, 53, 49,
+        46, 50, 54, 58, 63, 68, 72, 76, 80, 83, 85, 88, 90, 90, 58, 54, 51, 54,
+        58, 62, 67, 71, 75, 79, 83, 85, 88, 90, 93, 93, 64, 60, 56, 59, 62, 66,
+        70, 74, 79, 82, 85, 88, 91, 93, 95, 95, 69, 65, 61, 63, 66, 70, 74, 77,
+        81, 84, 88, 90, 93, 95, 97, 97, 75, 70, 66, 69, 71, 74, 77, 81, 84, 87,
+        90, 93, 95, 97, 99, 99, 75, 70, 66, 69, 71, 74, 77, 81, 84, 87, 90, 93,
+        95, 97, 99, 99,
+        /* Size 32 */
+        30, 28, 26, 24, 23, 23, 24, 24, 24, 26, 28, 30, 32, 34, 36, 39, 42, 44,
+        46, 49, 52, 55, 57, 60, 63, 66, 68, 71, 73, 73, 73, 73, 28, 26, 25, 24,
+        23, 24, 24, 24, 25, 26, 27, 29, 31, 33, 35, 37, 40, 42, 45, 47, 50, 53,
+        55, 58, 61, 63, 66, 68, 71, 71, 71, 71, 26, 25, 25, 25, 24, 24, 24, 25,
+        25, 26, 27, 29, 30, 32, 34, 36, 39, 41, 43, 46, 49, 51, 53, 56, 59, 61,
+        64, 66, 69, 69, 69, 69, 24, 24, 25, 25, 25, 25, 25, 25, 25, 26, 27, 28,
+        29, 31, 33, 35, 37, 39, 42, 44, 47, 49, 52, 54, 57, 59, 62, 64, 67, 67,
+        67, 67, 23, 23, 24, 25, 26, 26, 25, 25, 25, 26, 27, 28, 29, 30, 32, 34,
+        36, 38, 40, 43, 45, 48, 50, 52, 55, 58, 60, 62, 65, 65, 65, 65, 23, 24,
+        24, 25, 26, 26, 26, 26, 27, 28, 29, 30, 31, 32, 34, 36, 38, 40, 42, 44,
+        47, 49, 51, 54, 57, 59, 61, 64, 66, 66, 66, 66, 24, 24, 24, 25, 25, 26,
+        27, 28, 29, 30, 31, 32, 33, 35, 36, 38, 40, 42, 44, 46, 49, 51, 53, 55,
+        58, 60, 62, 65, 67, 67, 67, 67, 24, 24, 25, 25, 25, 26, 28, 29, 31, 32,
+        33, 35, 36, 37, 39, 41, 43, 44, 46, 48, 51, 53, 55, 57, 60, 62, 64, 66,
+        69, 69, 69, 69, 24, 25, 25, 25, 25, 27, 29, 31, 34, 35, 36, 38, 39, 41,
+        42, 44, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 70, 70, 70, 70,
+        26, 26, 26, 26, 26, 28, 30, 32, 35, 37, 38, 40, 41, 43, 44, 46, 47, 49,
+        51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 71, 71, 71, 28, 27, 27, 27,
+        27, 29, 31, 33, 36, 38, 40, 41, 43, 45, 46, 48, 50, 52, 53, 55, 57, 59,
+        61, 63, 65, 67, 69, 71, 73, 73, 73, 73, 30, 29, 29, 28, 28, 30, 32, 35,
+        38, 40, 41, 44, 46, 47, 49, 51, 53, 54, 56, 58, 60, 61, 63, 65, 67, 69,
+        71, 72, 74, 74, 74, 74, 32, 31, 30, 29, 29, 31, 33, 36, 39, 41, 43, 46,
+        49, 50, 52, 54, 56, 57, 59, 61, 62, 64, 66, 67, 69, 71, 72, 74, 76, 76,
+        76, 76, 34, 33, 32, 31, 30, 32, 35, 37, 41, 43, 45, 47, 50, 52, 54, 56,
+        58, 59, 61, 63, 64, 66, 68, 69, 71, 73, 74, 76, 78, 78, 78, 78, 36, 35,
+        34, 33, 32, 34, 36, 39, 42, 44, 46, 49, 52, 54, 56, 58, 60, 61, 63, 65,
+        66, 68, 70, 71, 73, 74, 76, 78, 79, 79, 79, 79, 39, 37, 36, 35, 34, 36,
+        38, 41, 44, 46, 48, 51, 54, 56, 58, 60, 62, 64, 65, 67, 69, 70, 72, 73,
+        75, 76, 78, 79, 81, 81, 81, 81, 42, 40, 39, 37, 36, 38, 40, 43, 45, 47,
+        50, 53, 56, 58, 60, 62, 64, 66, 68, 69, 71, 73, 74, 76, 77, 79, 80, 81,
+        83, 83, 83, 83, 44, 42, 41, 39, 38, 40, 42, 44, 47, 49, 52, 54, 57, 59,
+        61, 64, 66, 68, 69, 71, 73, 74, 76, 77, 79, 80, 81, 83, 84, 84, 84, 84,
+        46, 45, 43, 42, 40, 42, 44, 46, 49, 51, 53, 56, 59, 61, 63, 65, 68, 69,
+        71, 73, 75, 76, 77, 79, 81, 82, 83, 84, 86, 86, 86, 86, 49, 47, 46, 44,
+        43, 44, 46, 48, 51, 53, 55, 58, 61, 63, 65, 67, 69, 71, 73, 75, 77, 78,
+        79, 81, 82, 83, 85, 86, 87, 87, 87, 87, 52, 50, 49, 47, 45, 47, 49, 51,
+        53, 55, 57, 60, 62, 64, 66, 69, 71, 73, 75, 77, 78, 80, 81, 83, 84, 85,
+        86, 88, 89, 89, 89, 89, 55, 53, 51, 49, 48, 49, 51, 53, 55, 57, 59, 61,
+        64, 66, 68, 70, 73, 74, 76, 78, 80, 81, 82, 84, 85, 86, 88, 89, 90, 90,
+        90, 90, 57, 55, 53, 52, 50, 51, 53, 55, 57, 59, 61, 63, 66, 68, 70, 72,
+        74, 76, 77, 79, 81, 82, 84, 85, 87, 88, 89, 90, 91, 91, 91, 91, 60, 58,
+        56, 54, 52, 54, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 76, 77, 79, 81,
+        83, 84, 85, 87, 88, 89, 90, 91, 92, 92, 92, 92, 63, 61, 59, 57, 55, 57,
+        58, 60, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79, 81, 82, 84, 85, 87, 88,
+        89, 90, 92, 93, 94, 94, 94, 94, 66, 63, 61, 59, 58, 59, 60, 62, 63, 65,
+        67, 69, 71, 73, 74, 76, 79, 80, 82, 83, 85, 86, 88, 89, 90, 91, 93, 94,
+        95, 95, 95, 95, 68, 66, 64, 62, 60, 61, 62, 64, 65, 67, 69, 71, 72, 74,
+        76, 78, 80, 81, 83, 85, 86, 88, 89, 90, 92, 93, 94, 95, 96, 96, 96, 96,
+        71, 68, 66, 64, 62, 64, 65, 66, 67, 69, 71, 72, 74, 76, 78, 79, 81, 83,
+        84, 86, 88, 89, 90, 91, 93, 94, 95, 96, 97, 97, 97, 97, 73, 71, 69, 67,
+        65, 66, 67, 69, 70, 71, 73, 74, 76, 78, 79, 81, 83, 84, 86, 87, 89, 90,
+        91, 92, 94, 95, 96, 97, 98, 98, 98, 98, 73, 71, 69, 67, 65, 66, 67, 69,
+        70, 71, 73, 74, 76, 78, 79, 81, 83, 84, 86, 87, 89, 90, 91, 92, 94, 95,
+        96, 97, 98, 98, 98, 98, 73, 71, 69, 67, 65, 66, 67, 69, 70, 71, 73, 74,
+        76, 78, 79, 81, 83, 84, 86, 87, 89, 90, 91, 92, 94, 95, 96, 97, 98, 98,
+        98, 98, 73, 71, 69, 67, 65, 66, 67, 69, 70, 71, 73, 74, 76, 78, 79, 81,
+        83, 84, 86, 87, 89, 90, 91, 92, 94, 95, 96, 97, 98, 98, 98, 98 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 96, 104, 126, 96, 113, 119, 133, 104, 119, 146, 162, 126, 133, 162,
+        183,
+        /* Size 8 */
+        64, 53, 90, 94, 100, 110, 121, 134, 53, 73, 91, 84, 86, 93, 103, 115,
+        90, 91, 104, 101, 101, 105, 113, 123, 94, 84, 101, 112, 117, 121, 127,
+        135, 100, 86, 101, 117, 128, 135, 141, 148, 110, 93, 105, 121, 135, 145,
+        153, 160, 121, 103, 113, 127, 141, 153, 163, 170, 134, 115, 123, 135,
+        148, 160, 170, 179,
+        /* Size 16 */
+        64, 58, 53, 66, 90, 92, 94, 97, 100, 105, 110, 115, 121, 127, 134, 134,
+        58, 59, 61, 73, 90, 90, 89, 90, 92, 96, 101, 106, 112, 117, 123, 123,
+        53, 61, 73, 81, 91, 87, 84, 85, 86, 89, 93, 98, 103, 109, 115, 115, 66,
+        73, 81, 89, 97, 94, 92, 92, 93, 96, 99, 103, 108, 113, 119, 119, 90, 90,
+        91, 97, 104, 103, 101, 101, 101, 103, 105, 109, 113, 118, 123, 123, 92,
+        90, 87, 94, 103, 105, 107, 107, 108, 110, 113, 116, 120, 124, 128, 128,
+        94, 89, 84, 92, 101, 107, 112, 115, 117, 119, 121, 124, 127, 131, 135,
+        135, 97, 90, 85, 92, 101, 107, 115, 118, 122, 125, 128, 131, 134, 137,
+        141, 141, 100, 92, 86, 93, 101, 108, 117, 122, 128, 131, 135, 138, 141,
+        144, 148, 148, 105, 96, 89, 96, 103, 110, 119, 125, 131, 135, 140, 143,
+        147, 150, 154, 154, 110, 101, 93, 99, 105, 113, 121, 128, 135, 140, 145,
+        149, 153, 157, 160, 160, 115, 106, 98, 103, 109, 116, 124, 131, 138,
+        143, 149, 153, 158, 161, 165, 165, 121, 112, 103, 108, 113, 120, 127,
+        134, 141, 147, 153, 158, 163, 166, 170, 170, 127, 117, 109, 113, 118,
+        124, 131, 137, 144, 150, 157, 161, 166, 170, 174, 174, 134, 123, 115,
+        119, 123, 128, 135, 141, 148, 154, 160, 165, 170, 174, 179, 179, 134,
+        123, 115, 119, 123, 128, 135, 141, 148, 154, 160, 165, 170, 174, 179,
+        179,
+        /* Size 32 */
+        64, 61, 58, 55, 53, 59, 66, 76, 90, 91, 92, 93, 94, 96, 97, 98, 100,
+        102, 105, 107, 110, 112, 115, 118, 121, 124, 127, 131, 134, 134, 134,
+        134, 61, 60, 59, 58, 57, 62, 70, 78, 90, 90, 91, 91, 91, 93, 94, 95, 96,
+        98, 100, 103, 105, 108, 110, 113, 116, 119, 122, 125, 129, 129, 129,
+        129, 58, 59, 59, 60, 61, 67, 73, 81, 90, 90, 90, 89, 89, 90, 90, 91, 92,
+        94, 96, 98, 101, 103, 106, 109, 112, 114, 117, 120, 123, 123, 123, 123,
+        55, 58, 60, 63, 67, 72, 77, 83, 91, 90, 88, 87, 86, 87, 87, 88, 89, 91,
+        93, 95, 97, 99, 102, 104, 107, 110, 113, 116, 119, 119, 119, 119, 53,
+        57, 61, 67, 73, 77, 81, 86, 91, 89, 87, 85, 84, 84, 85, 85, 86, 88, 89,
+        91, 93, 95, 98, 100, 103, 106, 109, 111, 115, 115, 115, 115, 59, 62, 67,
+        72, 77, 81, 85, 89, 94, 92, 91, 89, 87, 88, 88, 89, 89, 91, 92, 94, 96,
+        98, 100, 103, 105, 108, 111, 114, 117, 117, 117, 117, 66, 70, 73, 77,
+        81, 85, 89, 93, 97, 96, 94, 93, 92, 92, 92, 92, 93, 94, 96, 97, 99, 101,
+        103, 105, 108, 110, 113, 116, 119, 119, 119, 119, 76, 78, 81, 83, 86,
+        89, 93, 97, 101, 99, 98, 97, 96, 96, 96, 97, 97, 98, 99, 101, 102, 104,
+        106, 108, 110, 113, 115, 118, 121, 121, 121, 121, 90, 90, 90, 91, 91,
+        94, 97, 101, 104, 103, 103, 102, 101, 101, 101, 101, 101, 102, 103, 104,
+        105, 107, 109, 111, 113, 115, 118, 120, 123, 123, 123, 123, 91, 90, 90,
+        90, 89, 92, 96, 99, 103, 104, 104, 104, 104, 104, 104, 104, 105, 106,
+        107, 108, 109, 111, 112, 114, 116, 118, 121, 123, 126, 126, 126, 126,
+        92, 91, 90, 88, 87, 91, 94, 98, 103, 104, 105, 106, 107, 107, 107, 108,
+        108, 109, 110, 112, 113, 114, 116, 118, 120, 122, 124, 126, 128, 128,
+        128, 128, 93, 91, 89, 87, 85, 89, 93, 97, 102, 104, 106, 107, 109, 110,
+        111, 112, 112, 113, 115, 116, 117, 118, 120, 121, 123, 125, 127, 129,
+        131, 131, 131, 131, 94, 91, 89, 86, 84, 87, 92, 96, 101, 104, 107, 109,
+        112, 113, 115, 116, 117, 118, 119, 120, 121, 122, 124, 125, 127, 129,
+        131, 133, 135, 135, 135, 135, 96, 93, 90, 87, 84, 88, 92, 96, 101, 104,
+        107, 110, 113, 115, 116, 118, 119, 121, 122, 123, 124, 126, 127, 129,
+        130, 132, 134, 136, 138, 138, 138, 138, 97, 94, 90, 87, 85, 88, 92, 96,
+        101, 104, 107, 111, 115, 116, 118, 120, 122, 123, 125, 126, 128, 129,
+        131, 132, 134, 135, 137, 139, 141, 141, 141, 141, 98, 95, 91, 88, 85,
+        89, 92, 97, 101, 104, 108, 112, 116, 118, 120, 122, 125, 126, 128, 130,
+        131, 133, 134, 136, 137, 139, 141, 142, 144, 144, 144, 144, 100, 96, 92,
+        89, 86, 89, 93, 97, 101, 105, 108, 112, 117, 119, 122, 125, 128, 130,
+        131, 133, 135, 136, 138, 140, 141, 143, 144, 146, 148, 148, 148, 148,
+        102, 98, 94, 91, 88, 91, 94, 98, 102, 106, 109, 113, 118, 121, 123, 126,
+        130, 131, 133, 135, 137, 139, 141, 142, 144, 146, 147, 149, 151, 151,
+        151, 151, 105, 100, 96, 93, 89, 92, 96, 99, 103, 107, 110, 115, 119,
+        122, 125, 128, 131, 133, 135, 138, 140, 142, 143, 145, 147, 149, 150,
+        152, 154, 154, 154, 154, 107, 103, 98, 95, 91, 94, 97, 101, 104, 108,
+        112, 116, 120, 123, 126, 130, 133, 135, 138, 140, 143, 144, 146, 148,
+        150, 152, 153, 155, 157, 157, 157, 157, 110, 105, 101, 97, 93, 96, 99,
+        102, 105, 109, 113, 117, 121, 124, 128, 131, 135, 137, 140, 143, 145,
+        147, 149, 151, 153, 155, 157, 158, 160, 160, 160, 160, 112, 108, 103,
+        99, 95, 98, 101, 104, 107, 111, 114, 118, 122, 126, 129, 133, 136, 139,
+        142, 144, 147, 149, 151, 153, 156, 157, 159, 161, 162, 162, 162, 162,
+        115, 110, 106, 102, 98, 100, 103, 106, 109, 112, 116, 120, 124, 127,
+        131, 134, 138, 141, 143, 146, 149, 151, 153, 156, 158, 160, 161, 163,
+        165, 165, 165, 165, 118, 113, 109, 104, 100, 103, 105, 108, 111, 114,
+        118, 121, 125, 129, 132, 136, 140, 142, 145, 148, 151, 153, 156, 158,
+        160, 162, 164, 166, 168, 168, 168, 168, 121, 116, 112, 107, 103, 105,
+        108, 110, 113, 116, 120, 123, 127, 130, 134, 137, 141, 144, 147, 150,
+        153, 156, 158, 160, 163, 165, 166, 168, 170, 170, 170, 170, 124, 119,
+        114, 110, 106, 108, 110, 113, 115, 118, 122, 125, 129, 132, 135, 139,
+        143, 146, 149, 152, 155, 157, 160, 162, 165, 166, 168, 170, 172, 172,
+        172, 172, 127, 122, 117, 113, 109, 111, 113, 115, 118, 121, 124, 127,
+        131, 134, 137, 141, 144, 147, 150, 153, 157, 159, 161, 164, 166, 168,
+        170, 172, 174, 174, 174, 174, 131, 125, 120, 116, 111, 114, 116, 118,
+        120, 123, 126, 129, 133, 136, 139, 142, 146, 149, 152, 155, 158, 161,
+        163, 166, 168, 170, 172, 174, 177, 177, 177, 177, 134, 129, 123, 119,
+        115, 117, 119, 121, 123, 126, 128, 131, 135, 138, 141, 144, 148, 151,
+        154, 157, 160, 162, 165, 168, 170, 172, 174, 177, 179, 179, 179, 179,
+        134, 129, 123, 119, 115, 117, 119, 121, 123, 126, 128, 131, 135, 138,
+        141, 144, 148, 151, 154, 157, 160, 162, 165, 168, 170, 172, 174, 177,
+        179, 179, 179, 179, 134, 129, 123, 119, 115, 117, 119, 121, 123, 126,
+        128, 131, 135, 138, 141, 144, 148, 151, 154, 157, 160, 162, 165, 168,
+        170, 172, 174, 177, 179, 179, 179, 179, 134, 129, 123, 119, 115, 117,
+        119, 121, 123, 126, 128, 131, 135, 138, 141, 144, 148, 151, 154, 157,
+        160, 162, 165, 168, 170, 172, 174, 177, 179, 179, 179, 179 },
+      { /* Intra matrices */
+        /* Size 4 */
+        31, 47, 51, 63, 47, 56, 59, 67, 51, 59, 74, 82, 63, 67, 82, 95,
+        /* Size 8 */
+        33, 27, 47, 50, 53, 58, 65, 72, 27, 38, 48, 44, 45, 49, 55, 61, 47, 48,
+        55, 54, 53, 56, 60, 66, 50, 44, 54, 60, 62, 65, 68, 73, 53, 45, 53, 62,
+        69, 73, 76, 80, 58, 49, 56, 65, 73, 79, 84, 88, 65, 55, 60, 68, 76, 84,
+        89, 94, 72, 61, 66, 73, 80, 88, 94, 99,
+        /* Size 16 */
+        32, 29, 26, 33, 46, 47, 48, 50, 51, 54, 57, 60, 63, 67, 70, 70, 29, 30,
+        31, 37, 46, 46, 45, 46, 47, 50, 52, 55, 58, 61, 65, 65, 26, 31, 37, 41,
+        47, 45, 43, 43, 44, 46, 48, 50, 53, 56, 60, 60, 33, 37, 41, 45, 50, 48,
+        47, 47, 48, 49, 51, 53, 56, 59, 62, 62, 46, 46, 47, 50, 54, 53, 52, 52,
+        52, 53, 55, 57, 59, 61, 64, 64, 47, 46, 45, 48, 53, 54, 55, 56, 56, 57,
+        59, 60, 62, 65, 67, 67, 48, 45, 43, 47, 52, 55, 58, 60, 61, 62, 63, 65,
+        67, 69, 71, 71, 50, 46, 43, 47, 52, 56, 60, 62, 64, 65, 67, 69, 70, 72,
+        74, 74, 51, 47, 44, 48, 52, 56, 61, 64, 67, 69, 71, 73, 75, 76, 78, 78,
+        54, 50, 46, 49, 53, 57, 62, 65, 69, 71, 74, 76, 78, 80, 82, 82, 57, 52,
+        48, 51, 55, 59, 63, 67, 71, 74, 77, 79, 82, 84, 86, 86, 60, 55, 50, 53,
+        57, 60, 65, 69, 73, 76, 79, 82, 84, 86, 89, 89, 63, 58, 53, 56, 59, 62,
+        67, 70, 75, 78, 82, 84, 87, 89, 92, 92, 67, 61, 56, 59, 61, 65, 69, 72,
+        76, 80, 84, 86, 89, 92, 94, 94, 70, 65, 60, 62, 64, 67, 71, 74, 78, 82,
+        86, 89, 92, 94, 97, 97, 70, 65, 60, 62, 64, 67, 71, 74, 78, 82, 86, 89,
+        92, 94, 97, 97,
+        /* Size 32 */
+        32, 30, 29, 27, 26, 29, 33, 38, 45, 46, 47, 47, 48, 49, 49, 50, 51, 52,
+        53, 55, 56, 58, 59, 61, 63, 64, 66, 68, 70, 70, 70, 70, 30, 30, 29, 29,
+        28, 31, 35, 39, 46, 46, 46, 46, 46, 47, 47, 48, 49, 50, 51, 52, 54, 55,
+        57, 58, 60, 61, 63, 65, 67, 67, 67, 67, 29, 29, 30, 30, 30, 33, 37, 41,
+        46, 46, 45, 45, 45, 45, 46, 46, 47, 48, 49, 50, 51, 53, 54, 56, 57, 59,
+        60, 62, 64, 64, 64, 64, 27, 29, 30, 32, 33, 36, 39, 42, 46, 45, 45, 44,
+        43, 44, 44, 45, 45, 46, 47, 48, 49, 50, 52, 53, 55, 56, 58, 59, 61, 61,
+        61, 61, 26, 28, 30, 33, 37, 39, 41, 43, 46, 45, 44, 43, 42, 42, 43, 43,
+        43, 44, 45, 46, 47, 48, 50, 51, 53, 54, 56, 57, 59, 59, 59, 59, 29, 31,
+        33, 36, 39, 41, 43, 45, 48, 47, 46, 45, 44, 44, 45, 45, 45, 46, 47, 48,
+        49, 50, 51, 52, 54, 55, 57, 58, 60, 60, 60, 60, 33, 35, 37, 39, 41, 43,
+        45, 47, 49, 49, 48, 47, 46, 47, 47, 47, 47, 48, 49, 49, 50, 51, 53, 54,
+        55, 57, 58, 59, 61, 61, 61, 61, 38, 39, 41, 42, 43, 45, 47, 49, 51, 51,
+        50, 49, 49, 49, 49, 49, 49, 50, 51, 51, 52, 53, 54, 55, 57, 58, 59, 61,
+        62, 62, 62, 62, 45, 46, 46, 46, 46, 48, 49, 51, 53, 53, 52, 52, 52, 52,
+        52, 51, 51, 52, 53, 53, 54, 55, 56, 57, 58, 59, 61, 62, 63, 63, 63, 63,
+        46, 46, 46, 45, 45, 47, 49, 51, 53, 53, 53, 53, 53, 53, 53, 53, 53, 54,
+        55, 55, 56, 57, 58, 59, 60, 61, 62, 64, 65, 65, 65, 65, 47, 46, 45, 45,
+        44, 46, 48, 50, 52, 53, 53, 54, 54, 55, 55, 55, 55, 56, 57, 57, 58, 59,
+        60, 61, 62, 63, 64, 65, 67, 67, 67, 67, 47, 46, 45, 44, 43, 45, 47, 49,
+        52, 53, 54, 55, 56, 56, 57, 57, 58, 58, 59, 59, 60, 61, 62, 63, 64, 65,
+        66, 67, 68, 68, 68, 68, 48, 46, 45, 43, 42, 44, 46, 49, 52, 53, 54, 56,
+        58, 58, 59, 59, 60, 61, 61, 62, 62, 63, 64, 65, 66, 67, 68, 69, 70, 70,
+        70, 70, 49, 47, 45, 44, 42, 44, 47, 49, 52, 53, 55, 56, 58, 59, 60, 61,
+        62, 62, 63, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 72, 72, 72, 49, 47,
+        46, 44, 43, 45, 47, 49, 52, 53, 55, 57, 59, 60, 61, 62, 63, 64, 64, 65,
+        66, 67, 68, 69, 69, 70, 71, 72, 74, 74, 74, 74, 50, 48, 46, 45, 43, 45,
+        47, 49, 51, 53, 55, 57, 59, 61, 62, 63, 65, 65, 66, 67, 68, 69, 70, 71,
+        72, 72, 73, 74, 75, 75, 75, 75, 51, 49, 47, 45, 43, 45, 47, 49, 51, 53,
+        55, 58, 60, 62, 63, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 76,
+        77, 77, 77, 77, 52, 50, 48, 46, 44, 46, 48, 50, 52, 54, 56, 58, 61, 62,
+        64, 65, 67, 68, 69, 70, 72, 72, 73, 74, 75, 76, 77, 78, 79, 79, 79, 79,
+        53, 51, 49, 47, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 64, 66, 68, 69,
+        70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 81, 81, 81, 55, 52, 50, 48,
+        46, 48, 49, 51, 53, 55, 57, 59, 62, 63, 65, 67, 69, 70, 72, 73, 75, 76,
+        77, 78, 79, 80, 81, 82, 83, 83, 83, 83, 56, 54, 51, 49, 47, 49, 50, 52,
+        54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 73, 75, 76, 77, 78, 79, 81, 82,
+        83, 84, 85, 85, 85, 85, 58, 55, 53, 50, 48, 50, 51, 53, 55, 57, 59, 61,
+        63, 65, 67, 69, 71, 72, 74, 76, 77, 78, 80, 81, 82, 83, 84, 85, 86, 86,
+        86, 86, 59, 57, 54, 52, 50, 51, 53, 54, 56, 58, 60, 62, 64, 66, 68, 70,
+        72, 73, 75, 77, 78, 80, 81, 82, 83, 84, 85, 86, 87, 87, 87, 87, 61, 58,
+        56, 53, 51, 52, 54, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 74, 76, 78,
+        79, 81, 82, 83, 85, 86, 87, 88, 89, 89, 89, 89, 63, 60, 57, 55, 53, 54,
+        55, 57, 58, 60, 62, 64, 66, 68, 69, 72, 74, 75, 77, 79, 81, 82, 83, 85,
+        86, 87, 88, 89, 91, 91, 91, 91, 64, 61, 59, 56, 54, 55, 57, 58, 59, 61,
+        63, 65, 67, 69, 70, 72, 75, 76, 78, 80, 82, 83, 84, 86, 87, 88, 89, 91,
+        92, 92, 92, 92, 66, 63, 60, 58, 56, 57, 58, 59, 61, 62, 64, 66, 68, 70,
+        71, 73, 76, 77, 79, 81, 83, 84, 85, 87, 88, 89, 91, 92, 93, 93, 93, 93,
+        68, 65, 62, 59, 57, 58, 59, 61, 62, 64, 65, 67, 69, 71, 72, 74, 76, 78,
+        80, 82, 84, 85, 86, 88, 89, 91, 92, 93, 94, 94, 94, 94, 70, 67, 64, 61,
+        59, 60, 61, 62, 63, 65, 67, 68, 70, 72, 74, 75, 77, 79, 81, 83, 85, 86,
+        87, 89, 91, 92, 93, 94, 96, 96, 96, 96, 70, 67, 64, 61, 59, 60, 61, 62,
+        63, 65, 67, 68, 70, 72, 74, 75, 77, 79, 81, 83, 85, 86, 87, 89, 91, 92,
+        93, 94, 96, 96, 96, 96, 70, 67, 64, 61, 59, 60, 61, 62, 63, 65, 67, 68,
+        70, 72, 74, 75, 77, 79, 81, 83, 85, 86, 87, 89, 91, 92, 93, 94, 96, 96,
+        96, 96, 70, 67, 64, 61, 59, 60, 61, 62, 63, 65, 67, 68, 70, 72, 74, 75,
+        77, 79, 81, 83, 85, 86, 87, 89, 91, 92, 93, 94, 96, 96, 96, 96 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 69, 104, 143, 69, 97, 124, 152, 104, 124, 156, 176, 143, 152, 176,
+        192,
+        /* Size 8 */
+        64, 50, 54, 68, 86, 104, 122, 137, 50, 56, 54, 62, 76, 93, 109, 125, 54,
+        54, 72, 82, 92, 105, 118, 131, 68, 62, 82, 98, 110, 120, 130, 140, 86,
+        76, 92, 110, 123, 133, 142, 150, 104, 93, 105, 120, 133, 144, 151, 158,
+        122, 109, 118, 130, 142, 151, 158, 164, 137, 125, 131, 140, 150, 158,
+        164, 169,
+        /* Size 16 */
+        64, 56, 50, 52, 54, 60, 68, 76, 86, 94, 104, 112, 122, 129, 137, 137,
+        56, 55, 53, 54, 54, 59, 65, 72, 81, 89, 98, 106, 115, 122, 130, 130, 50,
+        53, 56, 55, 54, 58, 62, 69, 76, 84, 93, 100, 109, 116, 125, 125, 52, 54,
+        55, 59, 62, 66, 71, 77, 83, 90, 98, 105, 114, 120, 128, 128, 54, 54, 54,
+        62, 72, 77, 82, 87, 92, 98, 105, 111, 118, 124, 131, 131, 60, 59, 58,
+        66, 77, 82, 89, 94, 100, 106, 112, 118, 124, 130, 136, 136, 68, 65, 62,
+        71, 82, 89, 98, 104, 110, 115, 120, 125, 130, 135, 140, 140, 76, 72, 69,
+        77, 87, 94, 104, 109, 116, 121, 126, 131, 136, 140, 145, 145, 86, 81,
+        76, 83, 92, 100, 110, 116, 123, 128, 133, 138, 142, 146, 150, 150, 94,
+        89, 84, 90, 98, 106, 115, 121, 128, 133, 138, 142, 146, 150, 153, 153,
+        104, 98, 93, 98, 105, 112, 120, 126, 133, 138, 144, 147, 151, 154, 158,
+        158, 112, 106, 100, 105, 111, 118, 125, 131, 138, 142, 147, 151, 155,
+        158, 161, 161, 122, 115, 109, 114, 118, 124, 130, 136, 142, 146, 151,
+        155, 158, 161, 164, 164, 129, 122, 116, 120, 124, 130, 135, 140, 146,
+        150, 154, 158, 161, 164, 166, 166, 137, 130, 125, 128, 131, 136, 140,
+        145, 150, 153, 158, 161, 164, 166, 169, 169, 137, 130, 125, 128, 131,
+        136, 140, 145, 150, 153, 158, 161, 164, 166, 169, 169,
+        /* Size 32 */
+        64, 60, 56, 53, 50, 51, 52, 53, 54, 57, 60, 64, 68, 72, 76, 80, 86, 90,
+        94, 99, 104, 108, 112, 117, 122, 125, 129, 132, 137, 137, 137, 137, 60,
+        58, 55, 53, 52, 52, 53, 53, 54, 57, 59, 63, 66, 70, 74, 78, 83, 87, 91,
+        96, 101, 105, 109, 113, 118, 122, 125, 129, 133, 133, 133, 133, 56, 55,
+        55, 54, 53, 53, 54, 54, 54, 56, 59, 62, 65, 68, 72, 76, 81, 84, 89, 93,
+        98, 102, 106, 110, 115, 118, 122, 126, 130, 130, 130, 130, 53, 53, 54,
+        54, 55, 55, 55, 54, 54, 56, 59, 61, 64, 67, 70, 74, 78, 82, 86, 90, 95,
+        99, 103, 107, 112, 115, 119, 123, 127, 127, 127, 127, 50, 52, 53, 55,
+        56, 56, 55, 55, 54, 56, 58, 60, 62, 65, 69, 72, 76, 80, 84, 88, 93, 96,
+        100, 104, 109, 113, 116, 120, 125, 125, 125, 125, 51, 52, 53, 55, 56,
+        56, 57, 57, 58, 60, 62, 64, 66, 69, 72, 76, 80, 83, 87, 91, 95, 99, 103,
+        107, 111, 115, 118, 122, 126, 126, 126, 126, 52, 53, 54, 55, 55, 57, 59,
+        60, 62, 64, 66, 68, 71, 73, 77, 80, 83, 87, 90, 94, 98, 102, 105, 109,
+        114, 117, 120, 124, 128, 128, 128, 128, 53, 53, 54, 54, 55, 57, 60, 63,
+        67, 69, 71, 73, 76, 78, 81, 84, 88, 91, 94, 98, 101, 105, 108, 112, 116,
+        119, 122, 126, 129, 129, 129, 129, 54, 54, 54, 54, 54, 58, 62, 67, 72,
+        74, 77, 79, 82, 84, 87, 89, 92, 95, 98, 101, 105, 108, 111, 115, 118,
+        121, 124, 128, 131, 131, 131, 131, 57, 57, 56, 56, 56, 60, 64, 69, 74,
+        77, 79, 82, 85, 88, 90, 93, 96, 99, 102, 105, 108, 111, 114, 118, 121,
+        124, 127, 130, 133, 133, 133, 133, 60, 59, 59, 59, 58, 62, 66, 71, 77,
+        79, 82, 86, 89, 92, 94, 97, 100, 103, 106, 109, 112, 115, 118, 121, 124,
+        127, 130, 133, 136, 136, 136, 136, 64, 63, 62, 61, 60, 64, 68, 73, 79,
+        82, 86, 89, 93, 96, 99, 102, 105, 107, 110, 113, 116, 118, 121, 124,
+        127, 130, 132, 135, 138, 138, 138, 138, 68, 66, 65, 64, 62, 66, 71, 76,
+        82, 85, 89, 93, 98, 101, 104, 107, 110, 112, 115, 117, 120, 122, 125,
+        128, 130, 133, 135, 138, 140, 140, 140, 140, 72, 70, 68, 67, 65, 69, 73,
+        78, 84, 88, 92, 96, 101, 104, 106, 110, 113, 115, 118, 120, 123, 125,
+        128, 130, 133, 135, 138, 140, 143, 143, 143, 143, 76, 74, 72, 70, 69,
+        72, 77, 81, 87, 90, 94, 99, 104, 106, 109, 113, 116, 119, 121, 124, 126,
+        129, 131, 133, 136, 138, 140, 142, 145, 145, 145, 145, 80, 78, 76, 74,
+        72, 76, 80, 84, 89, 93, 97, 102, 107, 110, 113, 116, 120, 122, 124, 127,
+        130, 132, 134, 136, 139, 141, 143, 145, 147, 147, 147, 147, 86, 83, 81,
+        78, 76, 80, 83, 88, 92, 96, 100, 105, 110, 113, 116, 120, 123, 126, 128,
+        131, 133, 135, 138, 140, 142, 144, 146, 148, 150, 150, 150, 150, 90, 87,
+        84, 82, 80, 83, 87, 91, 95, 99, 103, 107, 112, 115, 119, 122, 126, 128,
+        131, 133, 136, 138, 140, 142, 144, 146, 148, 150, 151, 151, 151, 151,
+        94, 91, 89, 86, 84, 87, 90, 94, 98, 102, 106, 110, 115, 118, 121, 124,
+        128, 131, 133, 136, 138, 140, 142, 144, 146, 148, 150, 152, 153, 153,
+        153, 153, 99, 96, 93, 90, 88, 91, 94, 98, 101, 105, 109, 113, 117, 120,
+        124, 127, 131, 133, 136, 138, 141, 143, 145, 147, 149, 150, 152, 154,
+        155, 155, 155, 155, 104, 101, 98, 95, 93, 95, 98, 101, 105, 108, 112,
+        116, 120, 123, 126, 130, 133, 136, 138, 141, 144, 145, 147, 149, 151,
+        153, 154, 156, 158, 158, 158, 158, 108, 105, 102, 99, 96, 99, 102, 105,
+        108, 111, 115, 118, 122, 125, 129, 132, 135, 138, 140, 143, 145, 147,
+        149, 151, 153, 154, 156, 158, 159, 159, 159, 159, 112, 109, 106, 103,
+        100, 103, 105, 108, 111, 114, 118, 121, 125, 128, 131, 134, 138, 140,
+        142, 145, 147, 149, 151, 153, 155, 156, 158, 159, 161, 161, 161, 161,
+        117, 113, 110, 107, 104, 107, 109, 112, 115, 118, 121, 124, 128, 130,
+        133, 136, 140, 142, 144, 147, 149, 151, 153, 155, 157, 158, 159, 161,
+        162, 162, 162, 162, 122, 118, 115, 112, 109, 111, 114, 116, 118, 121,
+        124, 127, 130, 133, 136, 139, 142, 144, 146, 149, 151, 153, 155, 157,
+        158, 160, 161, 162, 164, 164, 164, 164, 125, 122, 118, 115, 113, 115,
+        117, 119, 121, 124, 127, 130, 133, 135, 138, 141, 144, 146, 148, 150,
+        153, 154, 156, 158, 160, 161, 162, 164, 165, 165, 165, 165, 129, 125,
+        122, 119, 116, 118, 120, 122, 124, 127, 130, 132, 135, 138, 140, 143,
+        146, 148, 150, 152, 154, 156, 158, 159, 161, 162, 164, 165, 166, 166,
+        166, 166, 132, 129, 126, 123, 120, 122, 124, 126, 128, 130, 133, 135,
+        138, 140, 142, 145, 148, 150, 152, 154, 156, 158, 159, 161, 162, 164,
+        165, 166, 167, 167, 167, 167, 137, 133, 130, 127, 125, 126, 128, 129,
+        131, 133, 136, 138, 140, 143, 145, 147, 150, 151, 153, 155, 158, 159,
+        161, 162, 164, 165, 166, 167, 169, 169, 169, 169, 137, 133, 130, 127,
+        125, 126, 128, 129, 131, 133, 136, 138, 140, 143, 145, 147, 150, 151,
+        153, 155, 158, 159, 161, 162, 164, 165, 166, 167, 169, 169, 169, 169,
+        137, 133, 130, 127, 125, 126, 128, 129, 131, 133, 136, 138, 140, 143,
+        145, 147, 150, 151, 153, 155, 158, 159, 161, 162, 164, 165, 166, 167,
+        169, 169, 169, 169, 137, 133, 130, 127, 125, 126, 128, 129, 131, 133,
+        136, 138, 140, 143, 145, 147, 150, 151, 153, 155, 158, 159, 161, 162,
+        164, 165, 166, 167, 169, 169, 169, 169 },
+      { /* Intra matrices */
+        /* Size 4 */
+        29, 32, 48, 68, 32, 45, 58, 73, 48, 58, 75, 86, 68, 73, 86, 94,
+        /* Size 8 */
+        34, 26, 28, 36, 46, 57, 67, 77, 26, 30, 29, 33, 41, 50, 60, 69, 28, 29,
+        39, 44, 50, 57, 65, 73, 36, 33, 44, 53, 60, 66, 73, 79, 46, 41, 50, 60,
+        68, 75, 80, 85, 57, 50, 57, 66, 75, 81, 86, 90, 67, 60, 65, 73, 80, 86,
+        90, 94, 77, 69, 73, 79, 85, 90, 94, 97,
+        /* Size 16 */
+        33, 29, 26, 27, 28, 31, 35, 39, 45, 50, 55, 60, 65, 70, 74, 74, 29, 28,
+        27, 27, 28, 30, 34, 37, 42, 47, 52, 56, 62, 66, 71, 71, 26, 27, 29, 28,
+        28, 30, 32, 35, 40, 44, 49, 53, 58, 62, 67, 67, 27, 27, 28, 30, 32, 34,
+        37, 40, 44, 47, 52, 56, 61, 65, 69, 69, 28, 28, 28, 32, 37, 40, 43, 45,
+        49, 52, 56, 59, 64, 67, 71, 71, 31, 30, 30, 34, 40, 43, 47, 50, 53, 56,
+        60, 63, 67, 70, 74, 74, 35, 34, 32, 37, 43, 47, 52, 55, 59, 61, 65, 67,
+        71, 74, 77, 77, 39, 37, 35, 40, 45, 50, 55, 58, 62, 65, 68, 71, 74, 77,
+        79, 79, 45, 42, 40, 44, 49, 53, 59, 62, 66, 69, 72, 75, 78, 80, 82, 82,
+        50, 47, 44, 47, 52, 56, 61, 65, 69, 72, 75, 78, 80, 82, 85, 85, 55, 52,
+        49, 52, 56, 60, 65, 68, 72, 75, 79, 81, 83, 85, 87, 87, 60, 56, 53, 56,
+        59, 63, 67, 71, 75, 78, 81, 83, 85, 87, 89, 89, 65, 62, 58, 61, 64, 67,
+        71, 74, 78, 80, 83, 85, 88, 89, 91, 91, 70, 66, 62, 65, 67, 70, 74, 77,
+        80, 82, 85, 87, 89, 91, 93, 93, 74, 71, 67, 69, 71, 74, 77, 79, 82, 85,
+        87, 89, 91, 93, 94, 94, 74, 71, 67, 69, 71, 74, 77, 79, 82, 85, 87, 89,
+        91, 93, 94, 94,
+        /* Size 32 */
+        33, 30, 28, 27, 25, 26, 26, 27, 27, 29, 30, 32, 35, 37, 39, 41, 44, 47,
+        49, 52, 55, 57, 59, 62, 65, 67, 69, 71, 73, 73, 73, 73, 30, 29, 28, 27,
+        26, 26, 27, 27, 27, 29, 30, 32, 34, 36, 38, 40, 43, 45, 47, 50, 53, 55,
+        57, 60, 63, 65, 67, 69, 71, 71, 71, 71, 28, 28, 28, 27, 27, 27, 27, 27,
+        27, 29, 30, 31, 33, 35, 37, 39, 42, 44, 46, 48, 51, 53, 56, 58, 61, 63,
+        65, 67, 70, 70, 70, 70, 27, 27, 27, 27, 28, 28, 28, 27, 27, 28, 30, 31,
+        32, 34, 36, 38, 40, 42, 44, 47, 50, 52, 54, 56, 59, 61, 63, 65, 68, 68,
+        68, 68, 25, 26, 27, 28, 29, 28, 28, 28, 27, 28, 29, 30, 32, 33, 35, 37,
+        39, 41, 43, 45, 48, 50, 52, 55, 57, 59, 61, 64, 66, 66, 66, 66, 26, 26,
+        27, 28, 28, 29, 29, 29, 29, 30, 31, 33, 34, 35, 37, 39, 41, 43, 45, 47,
+        50, 52, 54, 56, 59, 61, 63, 65, 67, 67, 67, 67, 26, 27, 27, 28, 28, 29,
+        30, 31, 32, 33, 34, 35, 36, 38, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57,
+        60, 62, 64, 66, 68, 68, 68, 68, 27, 27, 27, 27, 28, 29, 31, 32, 34, 35,
+        36, 38, 39, 40, 42, 44, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
+        69, 69, 69, 69, 27, 27, 27, 27, 27, 29, 32, 34, 37, 38, 39, 41, 42, 43,
+        45, 46, 48, 49, 51, 53, 55, 57, 59, 61, 63, 64, 66, 68, 70, 70, 70, 70,
+        29, 29, 29, 28, 28, 30, 33, 35, 38, 39, 41, 42, 44, 45, 47, 48, 50, 52,
+        53, 55, 57, 59, 60, 62, 64, 66, 68, 69, 71, 71, 71, 71, 30, 30, 30, 30,
+        29, 31, 34, 36, 39, 41, 42, 44, 46, 48, 49, 51, 52, 54, 55, 57, 59, 61,
+        62, 64, 66, 68, 69, 71, 73, 73, 73, 73, 32, 32, 31, 31, 30, 33, 35, 38,
+        41, 42, 44, 46, 49, 50, 52, 53, 55, 56, 58, 59, 61, 63, 64, 66, 68, 69,
+        71, 72, 74, 74, 74, 74, 35, 34, 33, 32, 32, 34, 36, 39, 42, 44, 46, 49,
+        51, 53, 54, 56, 58, 59, 60, 62, 64, 65, 66, 68, 70, 71, 72, 74, 75, 75,
+        75, 75, 37, 36, 35, 34, 33, 35, 38, 40, 43, 45, 48, 50, 53, 54, 56, 58,
+        59, 61, 62, 64, 65, 67, 68, 70, 71, 73, 74, 75, 77, 77, 77, 77, 39, 38,
+        37, 36, 35, 37, 39, 42, 45, 47, 49, 52, 54, 56, 58, 59, 61, 63, 64, 66,
+        67, 69, 70, 71, 73, 74, 75, 77, 78, 78, 78, 78, 41, 40, 39, 38, 37, 39,
+        41, 44, 46, 48, 51, 53, 56, 58, 59, 61, 63, 65, 66, 68, 69, 71, 72, 73,
+        75, 76, 77, 78, 80, 80, 80, 80, 44, 43, 42, 40, 39, 41, 43, 45, 48, 50,
+        52, 55, 58, 59, 61, 63, 65, 67, 68, 70, 71, 73, 74, 75, 76, 78, 79, 80,
+        81, 81, 81, 81, 47, 45, 44, 42, 41, 43, 45, 47, 49, 52, 54, 56, 59, 61,
+        63, 65, 67, 68, 70, 71, 73, 74, 75, 76, 78, 79, 80, 81, 82, 82, 82, 82,
+        49, 47, 46, 44, 43, 45, 47, 49, 51, 53, 55, 58, 60, 62, 64, 66, 68, 70,
+        71, 73, 74, 75, 77, 78, 79, 80, 81, 82, 83, 83, 83, 83, 52, 50, 48, 47,
+        45, 47, 49, 51, 53, 55, 57, 59, 62, 64, 66, 68, 70, 71, 73, 74, 76, 77,
+        78, 79, 81, 82, 83, 84, 85, 85, 85, 85, 55, 53, 51, 50, 48, 50, 51, 53,
+        55, 57, 59, 61, 64, 65, 67, 69, 71, 73, 74, 76, 77, 79, 80, 81, 82, 83,
+        84, 85, 86, 86, 86, 86, 57, 55, 53, 52, 50, 52, 53, 55, 57, 59, 61, 63,
+        65, 67, 69, 71, 73, 74, 75, 77, 79, 80, 81, 82, 83, 84, 85, 86, 87, 87,
+        87, 87, 59, 57, 56, 54, 52, 54, 55, 57, 59, 60, 62, 64, 66, 68, 70, 72,
+        74, 75, 77, 78, 80, 81, 82, 83, 84, 85, 86, 87, 88, 88, 88, 88, 62, 60,
+        58, 56, 55, 56, 57, 59, 61, 62, 64, 66, 68, 70, 71, 73, 75, 76, 78, 79,
+        81, 82, 83, 84, 85, 86, 87, 88, 89, 89, 89, 89, 65, 63, 61, 59, 57, 59,
+        60, 61, 63, 64, 66, 68, 70, 71, 73, 75, 76, 78, 79, 81, 82, 83, 84, 85,
+        86, 87, 88, 89, 90, 90, 90, 90, 67, 65, 63, 61, 59, 61, 62, 63, 64, 66,
+        68, 69, 71, 73, 74, 76, 78, 79, 80, 82, 83, 84, 85, 86, 87, 88, 89, 90,
+        90, 90, 90, 90, 69, 67, 65, 63, 61, 63, 64, 65, 66, 68, 69, 71, 72, 74,
+        75, 77, 79, 80, 81, 83, 84, 85, 86, 87, 88, 89, 90, 90, 91, 91, 91, 91,
+        71, 69, 67, 65, 64, 65, 66, 67, 68, 69, 71, 72, 74, 75, 77, 78, 80, 81,
+        82, 84, 85, 86, 87, 88, 89, 90, 90, 91, 92, 92, 92, 92, 73, 71, 70, 68,
+        66, 67, 68, 69, 70, 71, 73, 74, 75, 77, 78, 80, 81, 82, 83, 85, 86, 87,
+        88, 89, 90, 90, 91, 92, 93, 93, 93, 93, 73, 71, 70, 68, 66, 67, 68, 69,
+        70, 71, 73, 74, 75, 77, 78, 80, 81, 82, 83, 85, 86, 87, 88, 89, 90, 90,
+        91, 92, 93, 93, 93, 93, 73, 71, 70, 68, 66, 67, 68, 69, 70, 71, 73, 74,
+        75, 77, 78, 80, 81, 82, 83, 85, 86, 87, 88, 89, 90, 90, 91, 92, 93, 93,
+        93, 93, 73, 71, 70, 68, 66, 67, 68, 69, 70, 71, 73, 74, 75, 77, 78, 80,
+        81, 82, 83, 85, 86, 87, 88, 89, 90, 90, 91, 92, 93, 93, 93, 93 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 94, 100, 120, 94, 108, 114, 126, 100, 114, 136, 149, 120, 126, 149,
+        166,
+        /* Size 8 */
+        64, 53, 88, 92, 97, 105, 115, 126, 53, 73, 89, 82, 84, 91, 100, 109, 88,
+        89, 100, 98, 98, 102, 108, 116, 92, 82, 98, 107, 111, 115, 120, 126, 97,
+        84, 98, 111, 121, 127, 132, 137, 105, 91, 102, 115, 127, 135, 142, 147,
+        115, 100, 108, 120, 132, 142, 149, 155, 126, 109, 116, 126, 137, 147,
+        155, 162,
+        /* Size 16 */
+        64, 58, 53, 66, 88, 90, 92, 94, 97, 101, 105, 110, 115, 120, 126, 126,
+        58, 60, 61, 72, 88, 87, 87, 88, 90, 94, 97, 102, 107, 112, 117, 117, 53,
+        61, 73, 80, 89, 85, 82, 83, 84, 87, 91, 95, 100, 104, 109, 109, 66, 72,
+        80, 87, 94, 92, 89, 90, 90, 93, 96, 100, 104, 108, 113, 113, 88, 88, 89,
+        94, 100, 99, 98, 98, 98, 100, 102, 105, 108, 112, 116, 116, 90, 87, 85,
+        92, 99, 101, 102, 103, 104, 106, 108, 111, 114, 117, 121, 121, 92, 87,
+        82, 89, 98, 102, 107, 109, 111, 113, 115, 117, 120, 123, 126, 126, 94,
+        88, 83, 90, 98, 103, 109, 113, 116, 118, 121, 123, 126, 128, 132, 132,
+        97, 90, 84, 90, 98, 104, 111, 116, 121, 124, 127, 129, 132, 134, 137,
+        137, 101, 94, 87, 93, 100, 106, 113, 118, 124, 127, 131, 134, 136, 139,
+        142, 142, 105, 97, 91, 96, 102, 108, 115, 121, 127, 131, 135, 138, 142,
+        144, 147, 147, 110, 102, 95, 100, 105, 111, 117, 123, 129, 134, 138,
+        142, 145, 148, 151, 151, 115, 107, 100, 104, 108, 114, 120, 126, 132,
+        136, 142, 145, 149, 152, 155, 155, 120, 112, 104, 108, 112, 117, 123,
+        128, 134, 139, 144, 148, 152, 155, 158, 158, 126, 117, 109, 113, 116,
+        121, 126, 132, 137, 142, 147, 151, 155, 158, 162, 162, 126, 117, 109,
+        113, 116, 121, 126, 132, 137, 142, 147, 151, 155, 158, 162, 162,
+        /* Size 32 */
+        64, 61, 58, 56, 53, 59, 66, 75, 88, 89, 90, 91, 92, 93, 94, 95, 97, 99,
+        101, 103, 105, 108, 110, 113, 115, 118, 120, 123, 126, 126, 126, 126,
+        61, 60, 59, 58, 57, 63, 69, 77, 88, 88, 89, 89, 89, 90, 91, 92, 93, 95,
+        97, 99, 101, 103, 106, 108, 111, 113, 116, 118, 121, 121, 121, 121, 58,
+        59, 60, 61, 61, 67, 72, 80, 88, 88, 87, 87, 87, 88, 88, 89, 90, 92, 94,
+        95, 97, 100, 102, 104, 107, 109, 112, 114, 117, 117, 117, 117, 56, 58,
+        61, 63, 67, 71, 76, 82, 89, 88, 86, 85, 84, 85, 86, 86, 87, 89, 90, 92,
+        94, 96, 98, 101, 103, 105, 108, 110, 113, 113, 113, 113, 53, 57, 61, 67,
+        73, 76, 80, 84, 89, 87, 85, 84, 82, 83, 83, 84, 84, 86, 87, 89, 91, 93,
+        95, 97, 100, 102, 104, 107, 109, 109, 109, 109, 59, 63, 67, 71, 76, 80,
+        83, 87, 92, 90, 88, 87, 86, 86, 86, 87, 87, 89, 90, 92, 93, 95, 97, 99,
+        102, 104, 106, 109, 111, 111, 111, 111, 66, 69, 72, 76, 80, 83, 87, 90,
+        94, 93, 92, 91, 89, 90, 90, 90, 90, 92, 93, 94, 96, 98, 100, 102, 104,
+        106, 108, 110, 113, 113, 113, 113, 75, 77, 80, 82, 84, 87, 90, 94, 97,
+        96, 95, 94, 93, 94, 94, 94, 94, 95, 96, 97, 99, 100, 102, 104, 106, 108,
+        110, 112, 115, 115, 115, 115, 88, 88, 88, 89, 89, 92, 94, 97, 100, 100,
+        99, 99, 98, 98, 98, 98, 98, 99, 100, 101, 102, 103, 105, 106, 108, 110,
+        112, 114, 116, 116, 116, 116, 89, 88, 88, 88, 87, 90, 93, 96, 100, 100,
+        100, 100, 100, 100, 100, 101, 101, 102, 103, 104, 105, 106, 108, 109,
+        111, 113, 115, 117, 119, 119, 119, 119, 90, 89, 87, 86, 85, 88, 92, 95,
+        99, 100, 101, 102, 102, 103, 103, 104, 104, 105, 106, 107, 108, 109,
+        111, 112, 114, 116, 117, 119, 121, 121, 121, 121, 91, 89, 87, 85, 84,
+        87, 91, 94, 99, 100, 102, 103, 105, 106, 106, 107, 108, 109, 109, 110,
+        111, 113, 114, 115, 117, 118, 120, 122, 124, 124, 124, 124, 92, 89, 87,
+        84, 82, 86, 89, 93, 98, 100, 102, 105, 107, 108, 109, 110, 111, 112,
+        113, 114, 115, 116, 117, 119, 120, 122, 123, 125, 126, 126, 126, 126,
+        93, 90, 88, 85, 83, 86, 90, 94, 98, 100, 103, 106, 108, 110, 111, 112,
+        114, 115, 116, 117, 118, 119, 120, 121, 123, 124, 126, 127, 129, 129,
+        129, 129, 94, 91, 88, 86, 83, 86, 90, 94, 98, 100, 103, 106, 109, 111,
+        113, 114, 116, 117, 118, 119, 121, 122, 123, 124, 126, 127, 128, 130,
+        132, 132, 132, 132, 95, 92, 89, 86, 84, 87, 90, 94, 98, 101, 104, 107,
+        110, 112, 114, 116, 118, 119, 121, 122, 124, 125, 126, 127, 129, 130,
+        131, 133, 134, 134, 134, 134, 97, 93, 90, 87, 84, 87, 90, 94, 98, 101,
+        104, 108, 111, 114, 116, 118, 121, 122, 124, 125, 127, 128, 129, 130,
+        132, 133, 134, 136, 137, 137, 137, 137, 99, 95, 92, 89, 86, 89, 92, 95,
+        99, 102, 105, 109, 112, 115, 117, 119, 122, 124, 125, 127, 129, 130,
+        131, 133, 134, 135, 137, 138, 139, 139, 139, 139, 101, 97, 94, 90, 87,
+        90, 93, 96, 100, 103, 106, 109, 113, 116, 118, 121, 124, 125, 127, 129,
+        131, 132, 134, 135, 136, 138, 139, 140, 142, 142, 142, 142, 103, 99, 95,
+        92, 89, 92, 94, 97, 101, 104, 107, 110, 114, 117, 119, 122, 125, 127,
+        129, 131, 133, 134, 136, 137, 139, 140, 142, 143, 144, 144, 144, 144,
+        105, 101, 97, 94, 91, 93, 96, 99, 102, 105, 108, 111, 115, 118, 121,
+        124, 127, 129, 131, 133, 135, 137, 138, 140, 142, 143, 144, 146, 147,
+        147, 147, 147, 108, 103, 100, 96, 93, 95, 98, 100, 103, 106, 109, 113,
+        116, 119, 122, 125, 128, 130, 132, 134, 137, 138, 140, 142, 143, 145,
+        146, 148, 149, 149, 149, 149, 110, 106, 102, 98, 95, 97, 100, 102, 105,
+        108, 111, 114, 117, 120, 123, 126, 129, 131, 134, 136, 138, 140, 142,
+        143, 145, 147, 148, 149, 151, 151, 151, 151, 113, 108, 104, 101, 97, 99,
+        102, 104, 106, 109, 112, 115, 119, 121, 124, 127, 130, 133, 135, 137,
+        140, 142, 143, 145, 147, 149, 150, 151, 153, 153, 153, 153, 115, 111,
+        107, 103, 100, 102, 104, 106, 108, 111, 114, 117, 120, 123, 126, 129,
+        132, 134, 136, 139, 142, 143, 145, 147, 149, 151, 152, 154, 155, 155,
+        155, 155, 118, 113, 109, 105, 102, 104, 106, 108, 110, 113, 116, 118,
+        122, 124, 127, 130, 133, 135, 138, 140, 143, 145, 147, 149, 151, 152,
+        154, 155, 157, 157, 157, 157, 120, 116, 112, 108, 104, 106, 108, 110,
+        112, 115, 117, 120, 123, 126, 128, 131, 134, 137, 139, 142, 144, 146,
+        148, 150, 152, 154, 155, 157, 158, 158, 158, 158, 123, 118, 114, 110,
+        107, 109, 110, 112, 114, 117, 119, 122, 125, 127, 130, 133, 136, 138,
+        140, 143, 146, 148, 149, 151, 154, 155, 157, 158, 160, 160, 160, 160,
+        126, 121, 117, 113, 109, 111, 113, 115, 116, 119, 121, 124, 126, 129,
+        132, 134, 137, 139, 142, 144, 147, 149, 151, 153, 155, 157, 158, 160,
+        162, 162, 162, 162, 126, 121, 117, 113, 109, 111, 113, 115, 116, 119,
+        121, 124, 126, 129, 132, 134, 137, 139, 142, 144, 147, 149, 151, 153,
+        155, 157, 158, 160, 162, 162, 162, 162, 126, 121, 117, 113, 109, 111,
+        113, 115, 116, 119, 121, 124, 126, 129, 132, 134, 137, 139, 142, 144,
+        147, 149, 151, 153, 155, 157, 158, 160, 162, 162, 162, 162, 126, 121,
+        117, 113, 109, 111, 113, 115, 116, 119, 121, 124, 126, 129, 132, 134,
+        137, 139, 142, 144, 147, 149, 151, 153, 155, 157, 158, 160, 162, 162,
+        162, 162 },
+      { /* Intra matrices */
+        /* Size 4 */
+        33, 49, 53, 64, 49, 57, 60, 67, 53, 60, 73, 81, 64, 67, 81, 91,
+        /* Size 8 */
+        35, 29, 49, 51, 54, 59, 65, 72, 29, 40, 50, 46, 47, 51, 56, 62, 49, 50,
+        56, 55, 55, 57, 61, 66, 51, 46, 55, 61, 63, 65, 68, 72, 54, 47, 55, 63,
+        69, 72, 76, 79, 59, 51, 57, 65, 72, 78, 82, 85, 65, 56, 61, 68, 76, 82,
+        86, 90, 72, 62, 66, 72, 79, 85, 90, 94,
+        /* Size 16 */
+        34, 31, 28, 36, 48, 49, 50, 52, 53, 55, 58, 61, 64, 67, 70, 70, 31, 32,
+        33, 39, 48, 48, 47, 48, 49, 51, 53, 56, 59, 62, 65, 65, 28, 33, 39, 43,
+        49, 47, 45, 45, 46, 48, 50, 52, 55, 57, 61, 61, 36, 39, 43, 47, 52, 50,
+        49, 49, 49, 51, 53, 55, 57, 60, 63, 63, 48, 48, 49, 52, 55, 55, 54, 54,
+        54, 55, 56, 58, 60, 62, 65, 65, 49, 48, 47, 50, 55, 55, 56, 57, 57, 58,
+        60, 61, 63, 65, 68, 68, 50, 47, 45, 49, 54, 56, 59, 61, 62, 63, 64, 65,
+        67, 69, 71, 71, 52, 48, 45, 49, 54, 57, 61, 62, 64, 66, 67, 69, 70, 72,
+        74, 74, 53, 49, 46, 49, 54, 57, 62, 64, 67, 69, 71, 72, 74, 76, 77, 77,
+        55, 51, 48, 51, 55, 58, 63, 66, 69, 71, 73, 75, 77, 78, 80, 80, 58, 53,
+        50, 53, 56, 60, 64, 67, 71, 73, 76, 78, 80, 82, 83, 83, 61, 56, 52, 55,
+        58, 61, 65, 69, 72, 75, 78, 80, 82, 84, 86, 86, 64, 59, 55, 57, 60, 63,
+        67, 70, 74, 77, 80, 82, 85, 86, 88, 88, 67, 62, 57, 60, 62, 65, 69, 72,
+        76, 78, 82, 84, 86, 88, 90, 90, 70, 65, 61, 63, 65, 68, 71, 74, 77, 80,
+        83, 86, 88, 90, 92, 92, 70, 65, 61, 63, 65, 68, 71, 74, 77, 80, 83, 86,
+        88, 90, 92, 92,
+        /* Size 32 */
+        34, 32, 31, 29, 28, 31, 35, 40, 47, 48, 48, 49, 50, 50, 51, 52, 52, 54,
+        55, 56, 57, 59, 60, 62, 63, 65, 66, 68, 70, 70, 70, 70, 32, 32, 31, 31,
+        30, 33, 37, 42, 47, 48, 48, 48, 48, 49, 49, 50, 50, 52, 53, 54, 55, 56,
+        58, 59, 61, 62, 64, 65, 67, 67, 67, 67, 31, 31, 32, 32, 33, 35, 39, 43,
+        48, 47, 47, 47, 47, 47, 48, 48, 49, 50, 51, 52, 53, 54, 55, 57, 58, 60,
+        61, 63, 64, 64, 64, 64, 29, 31, 32, 34, 35, 38, 41, 44, 48, 47, 47, 46,
+        45, 46, 46, 47, 47, 48, 49, 50, 51, 52, 53, 55, 56, 57, 59, 60, 62, 62,
+        62, 62, 28, 30, 33, 35, 39, 41, 43, 45, 48, 47, 46, 45, 44, 44, 45, 45,
+        45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 57, 58, 60, 60, 60, 60, 31, 33,
+        35, 38, 41, 43, 45, 47, 50, 49, 48, 47, 46, 46, 47, 47, 47, 48, 49, 50,
+        50, 52, 53, 54, 55, 57, 58, 59, 61, 61, 61, 61, 35, 37, 39, 41, 43, 45,
+        47, 49, 51, 50, 50, 49, 48, 48, 49, 49, 49, 50, 50, 51, 52, 53, 54, 55,
+        57, 58, 59, 60, 62, 62, 62, 62, 40, 42, 43, 44, 45, 47, 49, 51, 53, 52,
+        52, 51, 51, 51, 51, 51, 51, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62,
+        63, 63, 63, 63, 47, 47, 48, 48, 48, 50, 51, 53, 55, 54, 54, 54, 53, 53,
+        53, 53, 53, 54, 54, 55, 55, 56, 57, 58, 59, 60, 61, 63, 64, 64, 64, 64,
+        48, 48, 47, 47, 47, 49, 50, 52, 54, 54, 54, 54, 54, 55, 55, 55, 55, 55,
+        56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 65, 65, 65, 48, 48, 47, 47,
+        46, 48, 50, 52, 54, 54, 55, 55, 56, 56, 56, 56, 57, 57, 58, 58, 59, 60,
+        61, 61, 62, 63, 65, 66, 67, 67, 67, 67, 49, 48, 47, 46, 45, 47, 49, 51,
+        54, 54, 55, 56, 57, 58, 58, 58, 59, 59, 60, 60, 61, 62, 63, 63, 64, 65,
+        66, 67, 68, 68, 68, 68, 50, 48, 47, 45, 44, 46, 48, 51, 53, 54, 56, 57,
+        59, 59, 60, 60, 61, 62, 62, 63, 63, 64, 65, 65, 66, 67, 68, 69, 70, 70,
+        70, 70, 50, 49, 47, 46, 44, 46, 48, 51, 53, 55, 56, 58, 59, 60, 61, 62,
+        62, 63, 63, 64, 65, 65, 66, 67, 68, 69, 69, 70, 71, 71, 71, 71, 51, 49,
+        48, 46, 45, 47, 49, 51, 53, 55, 56, 58, 60, 61, 62, 63, 64, 64, 65, 66,
+        66, 67, 68, 69, 69, 70, 71, 72, 73, 73, 73, 73, 52, 50, 48, 47, 45, 47,
+        49, 51, 53, 55, 56, 58, 60, 62, 63, 64, 65, 66, 67, 67, 68, 69, 70, 70,
+        71, 72, 73, 74, 75, 75, 75, 75, 52, 50, 49, 47, 45, 47, 49, 51, 53, 55,
+        57, 59, 61, 62, 64, 65, 66, 67, 68, 69, 70, 71, 72, 72, 73, 74, 75, 76,
+        76, 76, 76, 76, 54, 52, 50, 48, 46, 48, 50, 51, 54, 55, 57, 59, 62, 63,
+        64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 78, 78, 78,
+        55, 53, 51, 49, 47, 49, 50, 52, 54, 56, 58, 60, 62, 63, 65, 67, 68, 69,
+        70, 71, 73, 73, 74, 75, 76, 77, 78, 78, 79, 79, 79, 79, 56, 54, 52, 50,
+        48, 50, 51, 53, 55, 56, 58, 60, 63, 64, 66, 67, 69, 70, 71, 73, 74, 75,
+        76, 77, 77, 78, 79, 80, 81, 81, 81, 81, 57, 55, 53, 51, 49, 50, 52, 54,
+        55, 57, 59, 61, 63, 65, 66, 68, 70, 71, 73, 74, 75, 76, 77, 78, 79, 80,
+        81, 82, 82, 82, 82, 82, 59, 56, 54, 52, 50, 52, 53, 55, 56, 58, 60, 62,
+        64, 65, 67, 69, 71, 72, 73, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 84,
+        84, 84, 60, 58, 55, 53, 51, 53, 54, 56, 57, 59, 61, 63, 65, 66, 68, 70,
+        72, 73, 74, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 85, 85, 85, 62, 59,
+        57, 55, 53, 54, 55, 57, 58, 60, 61, 63, 65, 67, 69, 70, 72, 74, 75, 77,
+        78, 79, 80, 81, 82, 83, 84, 85, 86, 86, 86, 86, 63, 61, 58, 56, 54, 55,
+        57, 58, 59, 61, 62, 64, 66, 68, 69, 71, 73, 75, 76, 77, 79, 80, 81, 82,
+        84, 85, 86, 86, 87, 87, 87, 87, 65, 62, 60, 57, 55, 57, 58, 59, 60, 62,
+        63, 65, 67, 69, 70, 72, 74, 75, 77, 78, 80, 81, 82, 83, 85, 86, 86, 87,
+        88, 88, 88, 88, 66, 64, 61, 59, 57, 58, 59, 60, 61, 63, 65, 66, 68, 69,
+        71, 73, 75, 76, 78, 79, 81, 82, 83, 84, 86, 86, 87, 88, 89, 89, 89, 89,
+        68, 65, 63, 60, 58, 59, 60, 62, 63, 64, 66, 67, 69, 70, 72, 74, 76, 77,
+        78, 80, 82, 83, 84, 85, 86, 87, 88, 89, 90, 90, 90, 90, 70, 67, 64, 62,
+        60, 61, 62, 63, 64, 65, 67, 68, 70, 71, 73, 75, 76, 78, 79, 81, 82, 84,
+        85, 86, 87, 88, 89, 90, 91, 91, 91, 91, 70, 67, 64, 62, 60, 61, 62, 63,
+        64, 65, 67, 68, 70, 71, 73, 75, 76, 78, 79, 81, 82, 84, 85, 86, 87, 88,
+        89, 90, 91, 91, 91, 91, 70, 67, 64, 62, 60, 61, 62, 63, 64, 65, 67, 68,
+        70, 71, 73, 75, 76, 78, 79, 81, 82, 84, 85, 86, 87, 88, 89, 90, 91, 91,
+        91, 91, 70, 67, 64, 62, 60, 61, 62, 63, 64, 65, 67, 68, 70, 71, 73, 75,
+        76, 78, 79, 81, 82, 84, 85, 86, 87, 88, 89, 90, 91, 91, 91, 91 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 69, 100, 132, 69, 93, 117, 139, 100, 117, 142, 157, 132, 139, 157,
+        169,
+        /* Size 8 */
+        64, 51, 54, 67, 84, 100, 114, 126, 51, 57, 55, 62, 75, 89, 104, 116, 54,
+        55, 71, 80, 89, 100, 111, 121, 67, 62, 80, 94, 104, 113, 121, 129, 84,
+        75, 89, 104, 115, 123, 130, 136, 100, 89, 100, 113, 123, 131, 137, 142,
+        114, 104, 111, 121, 130, 137, 142, 146, 126, 116, 121, 129, 136, 142,
+        146, 150,
+        /* Size 16 */
+        64, 57, 51, 53, 54, 60, 67, 75, 84, 91, 100, 106, 114, 119, 126, 126,
+        57, 55, 54, 54, 55, 59, 65, 71, 79, 86, 94, 101, 108, 114, 121, 121, 51,
+        54, 57, 56, 55, 58, 62, 68, 75, 82, 89, 96, 104, 109, 116, 116, 53, 54,
+        56, 59, 62, 66, 70, 75, 82, 88, 94, 100, 107, 113, 119, 119, 54, 55, 55,
+        62, 71, 76, 80, 84, 89, 94, 100, 105, 111, 116, 121, 121, 60, 59, 58,
+        66, 76, 81, 87, 91, 96, 101, 106, 111, 116, 120, 125, 125, 67, 65, 62,
+        70, 80, 87, 94, 99, 104, 108, 113, 116, 121, 125, 129, 129, 75, 71, 68,
+        75, 84, 91, 99, 104, 109, 113, 118, 121, 125, 128, 132, 132, 84, 79, 75,
+        82, 89, 96, 104, 109, 115, 119, 123, 126, 130, 133, 136, 136, 91, 86,
+        82, 88, 94, 101, 108, 113, 119, 123, 127, 130, 133, 136, 139, 139, 100,
+        94, 89, 94, 100, 106, 113, 118, 123, 127, 131, 134, 137, 139, 142, 142,
+        106, 101, 96, 100, 105, 111, 116, 121, 126, 130, 134, 137, 140, 142,
+        144, 144, 114, 108, 104, 107, 111, 116, 121, 125, 130, 133, 137, 140,
+        142, 144, 146, 146, 119, 114, 109, 113, 116, 120, 125, 128, 133, 136,
+        139, 142, 144, 146, 148, 148, 126, 121, 116, 119, 121, 125, 129, 132,
+        136, 139, 142, 144, 146, 148, 150, 150, 126, 121, 116, 119, 121, 125,
+        129, 132, 136, 139, 142, 144, 146, 148, 150, 150,
+        /* Size 32 */
+        64, 60, 57, 54, 51, 52, 53, 54, 54, 57, 60, 64, 67, 71, 75, 79, 84, 87,
+        91, 95, 100, 103, 106, 110, 114, 117, 119, 122, 126, 126, 126, 126, 60,
+        58, 56, 54, 52, 53, 53, 54, 55, 57, 60, 63, 66, 69, 73, 77, 81, 85, 88,
+        92, 97, 100, 103, 107, 111, 114, 117, 120, 123, 123, 123, 123, 57, 56,
+        55, 55, 54, 54, 54, 54, 55, 57, 59, 62, 65, 68, 71, 75, 79, 82, 86, 90,
+        94, 97, 101, 104, 108, 111, 114, 117, 121, 121, 121, 121, 54, 54, 55,
+        55, 55, 55, 55, 55, 55, 57, 59, 61, 64, 66, 70, 73, 77, 80, 84, 88, 92,
+        95, 98, 102, 106, 109, 112, 115, 118, 118, 118, 118, 51, 52, 54, 55, 57,
+        56, 56, 56, 55, 57, 58, 60, 62, 65, 68, 71, 75, 78, 82, 85, 89, 93, 96,
+        100, 104, 106, 109, 113, 116, 116, 116, 116, 52, 53, 54, 55, 56, 57, 57,
+        58, 58, 60, 62, 64, 66, 69, 72, 75, 78, 81, 84, 88, 92, 95, 98, 102,
+        105, 108, 111, 114, 117, 117, 117, 117, 53, 53, 54, 55, 56, 57, 59, 61,
+        62, 64, 66, 68, 70, 73, 75, 78, 82, 84, 88, 91, 94, 97, 100, 104, 107,
+        110, 113, 116, 119, 119, 119, 119, 54, 54, 54, 55, 56, 58, 61, 63, 67,
+        68, 70, 72, 75, 77, 80, 82, 85, 88, 91, 94, 97, 100, 103, 106, 109, 112,
+        114, 117, 120, 120, 120, 120, 54, 55, 55, 55, 55, 58, 62, 67, 71, 73,
+        76, 78, 80, 82, 84, 87, 89, 92, 94, 97, 100, 103, 105, 108, 111, 114,
+        116, 119, 121, 121, 121, 121, 57, 57, 57, 57, 57, 60, 64, 68, 73, 76,
+        78, 80, 83, 85, 88, 90, 92, 95, 97, 100, 103, 105, 108, 111, 113, 116,
+        118, 121, 123, 123, 123, 123, 60, 60, 59, 59, 58, 62, 66, 70, 76, 78,
+        81, 84, 87, 89, 91, 93, 96, 98, 101, 103, 106, 108, 111, 113, 116, 118,
+        120, 122, 125, 125, 125, 125, 64, 63, 62, 61, 60, 64, 68, 72, 78, 80,
+        84, 87, 90, 92, 95, 97, 100, 102, 104, 107, 109, 111, 113, 116, 118,
+        120, 122, 124, 127, 127, 127, 127, 67, 66, 65, 64, 62, 66, 70, 75, 80,
+        83, 87, 90, 94, 97, 99, 101, 104, 106, 108, 110, 113, 114, 116, 119,
+        121, 123, 125, 127, 129, 129, 129, 129, 71, 69, 68, 66, 65, 69, 73, 77,
+        82, 85, 89, 92, 97, 99, 101, 104, 107, 109, 111, 113, 115, 117, 119,
+        121, 123, 125, 126, 128, 130, 130, 130, 130, 75, 73, 71, 70, 68, 72, 75,
+        80, 84, 88, 91, 95, 99, 101, 104, 106, 109, 111, 113, 115, 118, 119,
+        121, 123, 125, 127, 128, 130, 132, 132, 132, 132, 79, 77, 75, 73, 71,
+        75, 78, 82, 87, 90, 93, 97, 101, 104, 106, 109, 112, 114, 116, 118, 120,
+        122, 124, 126, 127, 129, 131, 132, 134, 134, 134, 134, 84, 81, 79, 77,
+        75, 78, 82, 85, 89, 92, 96, 100, 104, 107, 109, 112, 115, 117, 119, 121,
+        123, 125, 126, 128, 130, 131, 133, 134, 136, 136, 136, 136, 87, 85, 82,
+        80, 78, 81, 84, 88, 92, 95, 98, 102, 106, 109, 111, 114, 117, 119, 121,
+        123, 125, 127, 128, 130, 132, 133, 134, 136, 137, 137, 137, 137, 91, 88,
+        86, 84, 82, 84, 88, 91, 94, 97, 101, 104, 108, 111, 113, 116, 119, 121,
+        123, 125, 127, 129, 130, 132, 133, 135, 136, 137, 139, 139, 139, 139,
+        95, 92, 90, 88, 85, 88, 91, 94, 97, 100, 103, 107, 110, 113, 115, 118,
+        121, 123, 125, 127, 129, 131, 132, 134, 135, 136, 138, 139, 140, 140,
+        140, 140, 100, 97, 94, 92, 89, 92, 94, 97, 100, 103, 106, 109, 113, 115,
+        118, 120, 123, 125, 127, 129, 131, 133, 134, 135, 137, 138, 139, 141,
+        142, 142, 142, 142, 103, 100, 97, 95, 93, 95, 97, 100, 103, 105, 108,
+        111, 114, 117, 119, 122, 125, 127, 129, 131, 133, 134, 135, 137, 138,
+        139, 141, 142, 143, 143, 143, 143, 106, 103, 101, 98, 96, 98, 100, 103,
+        105, 108, 111, 113, 116, 119, 121, 124, 126, 128, 130, 132, 134, 135,
+        137, 138, 140, 141, 142, 143, 144, 144, 144, 144, 110, 107, 104, 102,
+        100, 102, 104, 106, 108, 111, 113, 116, 119, 121, 123, 126, 128, 130,
+        132, 134, 135, 137, 138, 140, 141, 142, 143, 144, 145, 145, 145, 145,
+        114, 111, 108, 106, 104, 105, 107, 109, 111, 113, 116, 118, 121, 123,
+        125, 127, 130, 132, 133, 135, 137, 138, 140, 141, 142, 143, 144, 145,
+        146, 146, 146, 146, 117, 114, 111, 109, 106, 108, 110, 112, 114, 116,
+        118, 120, 123, 125, 127, 129, 131, 133, 135, 136, 138, 139, 141, 142,
+        143, 144, 145, 146, 147, 147, 147, 147, 119, 117, 114, 112, 109, 111,
+        113, 114, 116, 118, 120, 122, 125, 126, 128, 131, 133, 134, 136, 138,
+        139, 141, 142, 143, 144, 145, 146, 147, 148, 148, 148, 148, 122, 120,
+        117, 115, 113, 114, 116, 117, 119, 121, 122, 124, 127, 128, 130, 132,
+        134, 136, 137, 139, 141, 142, 143, 144, 145, 146, 147, 148, 149, 149,
+        149, 149, 126, 123, 121, 118, 116, 117, 119, 120, 121, 123, 125, 127,
+        129, 130, 132, 134, 136, 137, 139, 140, 142, 143, 144, 145, 146, 147,
+        148, 149, 150, 150, 150, 150, 126, 123, 121, 118, 116, 117, 119, 120,
+        121, 123, 125, 127, 129, 130, 132, 134, 136, 137, 139, 140, 142, 143,
+        144, 145, 146, 147, 148, 149, 150, 150, 150, 150, 126, 123, 121, 118,
+        116, 117, 119, 120, 121, 123, 125, 127, 129, 130, 132, 134, 136, 137,
+        139, 140, 142, 143, 144, 145, 146, 147, 148, 149, 150, 150, 150, 150,
+        126, 123, 121, 118, 116, 117, 119, 120, 121, 123, 125, 127, 129, 130,
+        132, 134, 136, 137, 139, 140, 142, 143, 144, 145, 146, 147, 148, 149,
+        150, 150, 150, 150 },
+      { /* Intra matrices */
+        /* Size 4 */
+        32, 34, 51, 69, 34, 47, 60, 73, 51, 60, 75, 83, 69, 73, 83, 90,
+        /* Size 8 */
+        37, 29, 31, 39, 49, 59, 68, 76, 29, 33, 31, 36, 44, 53, 61, 70, 31, 31,
+        41, 47, 52, 59, 66, 73, 39, 36, 47, 56, 62, 67, 73, 78, 49, 44, 52, 62,
+        69, 74, 79, 83, 59, 53, 59, 67, 74, 79, 83, 87, 68, 61, 66, 73, 79, 83,
+        87, 90, 76, 70, 73, 78, 83, 87, 90, 92,
+        /* Size 16 */
+        36, 32, 28, 29, 30, 34, 38, 42, 48, 52, 57, 61, 66, 70, 74, 74, 32, 31,
+        30, 30, 30, 33, 36, 40, 45, 49, 54, 58, 63, 67, 71, 71, 28, 30, 32, 31,
+        31, 33, 35, 38, 42, 46, 51, 55, 60, 64, 68, 68, 29, 30, 31, 33, 35, 37,
+        40, 43, 46, 50, 54, 58, 62, 66, 69, 69, 30, 30, 31, 35, 40, 43, 45, 48,
+        51, 54, 58, 61, 65, 68, 71, 71, 34, 33, 33, 37, 43, 46, 49, 52, 55, 58,
+        61, 64, 68, 70, 73, 73, 38, 36, 35, 40, 45, 49, 54, 57, 60, 63, 65, 68,
+        71, 73, 76, 76, 42, 40, 38, 43, 48, 52, 57, 60, 63, 66, 69, 71, 74, 76,
+        78, 78, 48, 45, 42, 46, 51, 55, 60, 63, 67, 70, 72, 74, 77, 78, 80, 80,
+        52, 49, 46, 50, 54, 58, 63, 66, 70, 72, 75, 77, 79, 81, 82, 82, 57, 54,
+        51, 54, 58, 61, 65, 69, 72, 75, 77, 79, 81, 83, 84, 84, 61, 58, 55, 58,
+        61, 64, 68, 71, 74, 77, 79, 81, 83, 84, 86, 86, 66, 63, 60, 62, 65, 68,
+        71, 74, 77, 79, 81, 83, 85, 86, 87, 87, 70, 67, 64, 66, 68, 70, 73, 76,
+        78, 81, 83, 84, 86, 87, 89, 89, 74, 71, 68, 69, 71, 73, 76, 78, 80, 82,
+        84, 86, 87, 89, 90, 90, 74, 71, 68, 69, 71, 73, 76, 78, 80, 82, 84, 86,
+        87, 89, 90, 90,
+        /* Size 32 */
+        35, 33, 31, 30, 28, 28, 29, 29, 30, 31, 33, 35, 37, 39, 42, 44, 47, 49,
+        51, 54, 57, 59, 61, 63, 65, 67, 69, 71, 73, 73, 73, 73, 33, 32, 31, 30,
+        29, 29, 29, 30, 30, 31, 33, 35, 37, 39, 41, 43, 46, 48, 50, 52, 55, 57,
+        59, 61, 64, 65, 67, 69, 71, 71, 71, 71, 31, 31, 30, 30, 30, 30, 30, 30,
+        30, 31, 33, 34, 36, 38, 40, 42, 44, 46, 48, 51, 53, 55, 57, 60, 62, 64,
+        66, 68, 70, 70, 70, 70, 30, 30, 30, 30, 30, 30, 30, 30, 30, 31, 32, 34,
+        35, 37, 39, 41, 43, 45, 47, 49, 52, 54, 56, 58, 61, 62, 64, 66, 68, 68,
+        68, 68, 28, 29, 30, 30, 31, 31, 31, 31, 30, 31, 32, 33, 34, 36, 38, 40,
+        42, 44, 46, 48, 50, 52, 54, 57, 59, 61, 63, 65, 67, 67, 67, 67, 28, 29,
+        30, 30, 31, 31, 32, 32, 32, 33, 34, 35, 37, 38, 40, 42, 44, 46, 47, 50,
+        52, 54, 56, 58, 60, 62, 64, 66, 68, 68, 68, 68, 29, 29, 30, 30, 31, 32,
+        33, 33, 34, 35, 37, 38, 39, 40, 42, 44, 46, 47, 49, 51, 53, 55, 57, 59,
+        61, 63, 65, 67, 68, 68, 68, 68, 29, 30, 30, 30, 31, 32, 33, 35, 37, 38,
+        39, 40, 42, 43, 45, 46, 48, 50, 51, 53, 55, 57, 59, 60, 62, 64, 66, 67,
+        69, 69, 69, 69, 30, 30, 30, 30, 30, 32, 34, 37, 40, 41, 42, 43, 45, 46,
+        47, 49, 50, 52, 53, 55, 57, 58, 60, 62, 64, 65, 67, 68, 70, 70, 70, 70,
+        31, 31, 31, 31, 31, 33, 35, 38, 41, 42, 44, 45, 47, 48, 49, 51, 52, 54,
+        55, 57, 59, 60, 62, 63, 65, 67, 68, 70, 71, 71, 71, 71, 33, 33, 33, 32,
+        32, 34, 37, 39, 42, 44, 45, 47, 49, 50, 51, 53, 54, 56, 57, 59, 60, 62,
+        63, 65, 67, 68, 69, 71, 72, 72, 72, 72, 35, 35, 34, 34, 33, 35, 38, 40,
+        43, 45, 47, 49, 51, 52, 54, 55, 57, 58, 59, 61, 62, 64, 65, 67, 68, 69,
+        71, 72, 74, 74, 74, 74, 37, 37, 36, 35, 34, 37, 39, 42, 45, 47, 49, 51,
+        53, 55, 56, 58, 59, 61, 62, 63, 65, 66, 67, 68, 70, 71, 72, 73, 75, 75,
+        75, 75, 39, 39, 38, 37, 36, 38, 40, 43, 46, 48, 50, 52, 55, 56, 58, 59,
+        61, 62, 63, 65, 66, 67, 69, 70, 71, 72, 73, 75, 76, 76, 76, 76, 42, 41,
+        40, 39, 38, 40, 42, 45, 47, 49, 51, 54, 56, 58, 59, 61, 63, 64, 65, 66,
+        68, 69, 70, 71, 73, 74, 75, 76, 77, 77, 77, 77, 44, 43, 42, 41, 40, 42,
+        44, 46, 49, 51, 53, 55, 58, 59, 61, 63, 64, 66, 67, 68, 69, 71, 72, 73,
+        74, 75, 76, 77, 78, 78, 78, 78, 47, 46, 44, 43, 42, 44, 46, 48, 50, 52,
+        54, 57, 59, 61, 63, 64, 66, 67, 69, 70, 71, 72, 73, 74, 76, 76, 77, 78,
+        79, 79, 79, 79, 49, 48, 46, 45, 44, 46, 47, 50, 52, 54, 56, 58, 61, 62,
+        64, 66, 67, 69, 70, 71, 73, 73, 75, 76, 77, 78, 78, 79, 80, 80, 80, 80,
+        51, 50, 48, 47, 46, 47, 49, 51, 53, 55, 57, 59, 62, 63, 65, 67, 69, 70,
+        71, 72, 74, 75, 76, 77, 78, 79, 79, 80, 81, 81, 81, 81, 54, 52, 51, 49,
+        48, 50, 51, 53, 55, 57, 59, 61, 63, 65, 66, 68, 70, 71, 72, 74, 75, 76,
+        77, 78, 79, 80, 81, 81, 82, 82, 82, 82, 57, 55, 53, 52, 50, 52, 53, 55,
+        57, 59, 60, 62, 65, 66, 68, 69, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81,
+        82, 82, 83, 83, 83, 83, 59, 57, 55, 54, 52, 54, 55, 57, 58, 60, 62, 64,
+        66, 67, 69, 71, 72, 73, 75, 76, 77, 78, 79, 80, 81, 82, 82, 83, 84, 84,
+        84, 84, 61, 59, 57, 56, 54, 56, 57, 59, 60, 62, 63, 65, 67, 69, 70, 72,
+        73, 75, 76, 77, 78, 79, 80, 81, 82, 83, 83, 84, 85, 85, 85, 85, 63, 61,
+        60, 58, 57, 58, 59, 60, 62, 63, 65, 67, 68, 70, 71, 73, 74, 76, 77, 78,
+        79, 80, 81, 82, 83, 83, 84, 85, 85, 85, 85, 85, 65, 64, 62, 61, 59, 60,
+        61, 62, 64, 65, 67, 68, 70, 71, 73, 74, 76, 77, 78, 79, 80, 81, 82, 83,
+        84, 84, 85, 86, 86, 86, 86, 86, 67, 65, 64, 62, 61, 62, 63, 64, 65, 67,
+        68, 69, 71, 72, 74, 75, 76, 78, 79, 80, 81, 82, 83, 83, 84, 85, 86, 86,
+        87, 87, 87, 87, 69, 67, 66, 64, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73,
+        75, 76, 77, 78, 79, 81, 82, 82, 83, 84, 85, 86, 86, 87, 87, 87, 87, 87,
+        71, 69, 68, 66, 65, 66, 67, 67, 68, 70, 71, 72, 73, 75, 76, 77, 78, 79,
+        80, 81, 82, 83, 84, 85, 86, 86, 87, 87, 88, 88, 88, 88, 73, 71, 70, 68,
+        67, 68, 68, 69, 70, 71, 72, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
+        85, 85, 86, 87, 87, 88, 89, 89, 89, 89, 73, 71, 70, 68, 67, 68, 68, 69,
+        70, 71, 72, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 85, 86, 87,
+        87, 88, 89, 89, 89, 89, 73, 71, 70, 68, 67, 68, 68, 69, 70, 71, 72, 74,
+        75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 85, 86, 87, 87, 88, 89, 89,
+        89, 89, 73, 71, 70, 68, 67, 68, 68, 69, 70, 71, 72, 74, 75, 76, 77, 78,
+        79, 80, 81, 82, 83, 84, 85, 85, 86, 87, 87, 88, 89, 89, 89, 89 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 91, 97, 113, 91, 104, 108, 119, 97, 108, 127, 138, 113, 119, 138,
+        151,
+        /* Size 8 */
+        64, 54, 86, 89, 93, 101, 109, 118, 54, 72, 87, 81, 82, 88, 96, 104, 86,
+        87, 97, 95, 94, 98, 103, 110, 89, 81, 95, 103, 106, 109, 113, 119, 93,
+        82, 94, 106, 114, 119, 123, 127, 101, 88, 98, 109, 119, 126, 131, 135,
+        109, 96, 103, 113, 123, 131, 137, 142, 118, 104, 110, 119, 127, 135,
+        142, 147,
+        /* Size 16 */
+        64, 58, 54, 66, 86, 87, 89, 91, 93, 97, 101, 105, 109, 114, 118, 118,
+        58, 60, 62, 72, 86, 85, 85, 86, 88, 91, 94, 98, 102, 106, 111, 111, 54,
+        62, 72, 79, 87, 84, 81, 81, 82, 85, 88, 92, 96, 100, 104, 104, 66, 72,
+        79, 85, 91, 89, 87, 87, 88, 90, 93, 96, 100, 103, 107, 107, 86, 86, 87,
+        91, 97, 96, 95, 94, 94, 96, 98, 100, 103, 107, 110, 110, 87, 85, 84, 89,
+        96, 97, 98, 99, 100, 101, 103, 106, 108, 111, 114, 114, 89, 85, 81, 87,
+        95, 98, 103, 104, 106, 108, 109, 111, 113, 116, 119, 119, 91, 86, 81,
+        87, 94, 99, 104, 107, 110, 112, 114, 116, 118, 120, 123, 123, 93, 88,
+        82, 88, 94, 100, 106, 110, 114, 116, 119, 121, 123, 125, 127, 127, 97,
+        91, 85, 90, 96, 101, 108, 112, 116, 119, 122, 124, 127, 129, 131, 131,
+        101, 94, 88, 93, 98, 103, 109, 114, 119, 122, 126, 128, 131, 133, 135,
+        135, 105, 98, 92, 96, 100, 106, 111, 116, 121, 124, 128, 131, 134, 136,
+        138, 138, 109, 102, 96, 100, 103, 108, 113, 118, 123, 127, 131, 134,
+        137, 139, 142, 142, 114, 106, 100, 103, 107, 111, 116, 120, 125, 129,
+        133, 136, 139, 142, 144, 144, 118, 111, 104, 107, 110, 114, 119, 123,
+        127, 131, 135, 138, 142, 144, 147, 147, 118, 111, 104, 107, 110, 114,
+        119, 123, 127, 131, 135, 138, 142, 144, 147, 147,
+        /* Size 32 */
+        64, 61, 58, 56, 54, 59, 66, 75, 86, 86, 87, 88, 89, 90, 91, 92, 93, 95,
+        97, 99, 101, 103, 105, 107, 109, 111, 114, 116, 118, 118, 118, 118, 61,
+        60, 59, 58, 57, 63, 69, 76, 86, 86, 86, 87, 87, 88, 89, 90, 90, 92, 94,
+        96, 97, 99, 101, 103, 106, 108, 110, 112, 114, 114, 114, 114, 58, 59,
+        60, 61, 62, 66, 72, 78, 86, 86, 85, 85, 85, 85, 86, 87, 88, 89, 91, 92,
+        94, 96, 98, 100, 102, 104, 106, 109, 111, 111, 111, 111, 56, 58, 61, 63,
+        66, 71, 75, 80, 86, 85, 84, 84, 83, 83, 84, 84, 85, 86, 88, 89, 91, 93,
+        95, 97, 99, 101, 103, 105, 108, 108, 108, 108, 54, 57, 62, 66, 72, 75,
+        79, 83, 87, 85, 84, 82, 81, 81, 81, 82, 82, 84, 85, 87, 88, 90, 92, 94,
+        96, 98, 100, 102, 104, 104, 104, 104, 59, 63, 66, 71, 75, 78, 82, 85,
+        89, 88, 86, 85, 84, 84, 84, 85, 85, 86, 88, 89, 90, 92, 94, 96, 98, 100,
+        102, 104, 106, 106, 106, 106, 66, 69, 72, 75, 79, 82, 85, 88, 91, 90,
+        89, 88, 87, 87, 87, 88, 88, 89, 90, 91, 93, 94, 96, 98, 100, 101, 103,
+        105, 107, 107, 107, 107, 75, 76, 78, 80, 83, 85, 88, 91, 94, 93, 92, 91,
+        91, 91, 91, 91, 91, 92, 93, 94, 95, 97, 98, 100, 101, 103, 105, 107,
+        109, 109, 109, 109, 86, 86, 86, 86, 87, 89, 91, 94, 97, 96, 96, 95, 95,
+        94, 94, 94, 94, 95, 96, 97, 98, 99, 100, 102, 103, 105, 107, 109, 110,
+        110, 110, 110, 86, 86, 86, 85, 85, 88, 90, 93, 96, 96, 96, 96, 96, 97,
+        97, 97, 97, 98, 99, 99, 100, 102, 103, 104, 106, 107, 109, 111, 112,
+        112, 112, 112, 87, 86, 85, 84, 84, 86, 89, 92, 96, 96, 97, 98, 98, 99,
+        99, 100, 100, 101, 101, 102, 103, 104, 106, 107, 108, 110, 111, 113,
+        114, 114, 114, 114, 88, 87, 85, 84, 82, 85, 88, 91, 95, 96, 98, 99, 101,
+        101, 102, 102, 103, 104, 104, 105, 106, 107, 108, 109, 111, 112, 113,
+        115, 116, 116, 116, 116, 89, 87, 85, 83, 81, 84, 87, 91, 95, 96, 98,
+        101, 103, 104, 104, 105, 106, 107, 108, 108, 109, 110, 111, 112, 113,
+        115, 116, 117, 119, 119, 119, 119, 90, 88, 85, 83, 81, 84, 87, 91, 94,
+        97, 99, 101, 104, 105, 106, 107, 108, 109, 110, 110, 111, 112, 113, 114,
+        116, 117, 118, 119, 121, 121, 121, 121, 91, 89, 86, 84, 81, 84, 87, 91,
+        94, 97, 99, 102, 104, 106, 107, 108, 110, 111, 112, 113, 114, 115, 116,
+        117, 118, 119, 120, 122, 123, 123, 123, 123, 92, 90, 87, 84, 82, 85, 88,
+        91, 94, 97, 100, 102, 105, 107, 108, 110, 112, 113, 114, 115, 116, 117,
+        118, 119, 120, 122, 123, 124, 125, 125, 125, 125, 93, 90, 88, 85, 82,
+        85, 88, 91, 94, 97, 100, 103, 106, 108, 110, 112, 114, 115, 116, 118,
+        119, 120, 121, 122, 123, 124, 125, 126, 127, 127, 127, 127, 95, 92, 89,
+        86, 84, 86, 89, 92, 95, 98, 101, 104, 107, 109, 111, 113, 115, 116, 118,
+        119, 120, 122, 123, 124, 125, 126, 127, 128, 129, 129, 129, 129, 97, 94,
+        91, 88, 85, 88, 90, 93, 96, 99, 101, 104, 108, 110, 112, 114, 116, 118,
+        119, 121, 122, 123, 124, 126, 127, 128, 129, 130, 131, 131, 131, 131,
+        99, 96, 92, 89, 87, 89, 91, 94, 97, 99, 102, 105, 108, 110, 113, 115,
+        118, 119, 121, 122, 124, 125, 126, 128, 129, 130, 131, 132, 133, 133,
+        133, 133, 101, 97, 94, 91, 88, 90, 93, 95, 98, 100, 103, 106, 109, 111,
+        114, 116, 119, 120, 122, 124, 126, 127, 128, 130, 131, 132, 133, 134,
+        135, 135, 135, 135, 103, 99, 96, 93, 90, 92, 94, 97, 99, 102, 104, 107,
+        110, 112, 115, 117, 120, 122, 123, 125, 127, 128, 130, 131, 132, 133,
+        134, 136, 137, 137, 137, 137, 105, 101, 98, 95, 92, 94, 96, 98, 100,
+        103, 106, 108, 111, 113, 116, 118, 121, 123, 124, 126, 128, 130, 131,
+        132, 134, 135, 136, 137, 138, 138, 138, 138, 107, 103, 100, 97, 94, 96,
+        98, 100, 102, 104, 107, 109, 112, 114, 117, 119, 122, 124, 126, 128,
+        130, 131, 132, 134, 135, 136, 138, 139, 140, 140, 140, 140, 109, 106,
+        102, 99, 96, 98, 100, 101, 103, 106, 108, 111, 113, 116, 118, 120, 123,
+        125, 127, 129, 131, 132, 134, 135, 137, 138, 139, 140, 142, 142, 142,
+        142, 111, 108, 104, 101, 98, 100, 101, 103, 105, 107, 110, 112, 115,
+        117, 119, 122, 124, 126, 128, 130, 132, 133, 135, 136, 138, 139, 140,
+        142, 143, 143, 143, 143, 114, 110, 106, 103, 100, 102, 103, 105, 107,
+        109, 111, 113, 116, 118, 120, 123, 125, 127, 129, 131, 133, 134, 136,
+        138, 139, 140, 142, 143, 144, 144, 144, 144, 116, 112, 109, 105, 102,
+        104, 105, 107, 109, 111, 113, 115, 117, 119, 122, 124, 126, 128, 130,
+        132, 134, 136, 137, 139, 140, 142, 143, 144, 145, 145, 145, 145, 118,
+        114, 111, 108, 104, 106, 107, 109, 110, 112, 114, 116, 119, 121, 123,
+        125, 127, 129, 131, 133, 135, 137, 138, 140, 142, 143, 144, 145, 147,
+        147, 147, 147, 118, 114, 111, 108, 104, 106, 107, 109, 110, 112, 114,
+        116, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 138, 140, 142,
+        143, 144, 145, 147, 147, 147, 147, 118, 114, 111, 108, 104, 106, 107,
+        109, 110, 112, 114, 116, 119, 121, 123, 125, 127, 129, 131, 133, 135,
+        137, 138, 140, 142, 143, 144, 145, 147, 147, 147, 147, 118, 114, 111,
+        108, 104, 106, 107, 109, 110, 112, 114, 116, 119, 121, 123, 125, 127,
+        129, 131, 133, 135, 137, 138, 140, 142, 143, 144, 145, 147, 147, 147,
+        147 },
+      { /* Intra matrices */
+        /* Size 4 */
+        35, 51, 54, 64, 51, 58, 61, 67, 54, 61, 72, 79, 64, 67, 79, 87,
+        /* Size 8 */
+        37, 31, 51, 53, 56, 60, 66, 71, 31, 42, 51, 48, 49, 52, 57, 63, 51, 51,
+        58, 56, 56, 58, 62, 66, 53, 48, 56, 61, 64, 66, 68, 72, 56, 49, 56, 64,
+        69, 72, 75, 77, 60, 52, 58, 66, 72, 76, 80, 83, 66, 57, 62, 68, 75, 80,
+        84, 87, 71, 63, 66, 72, 77, 83, 87, 90,
+        /* Size 16 */
+        37, 33, 31, 38, 50, 51, 52, 53, 55, 57, 59, 62, 64, 67, 70, 70, 33, 34,
+        35, 41, 50, 50, 49, 50, 51, 53, 55, 57, 60, 63, 65, 65, 31, 35, 41, 45,
+        50, 48, 47, 47, 48, 49, 51, 54, 56, 59, 61, 61, 38, 41, 45, 49, 53, 52,
+        51, 51, 51, 53, 54, 56, 58, 61, 63, 63, 50, 50, 50, 53, 57, 56, 55, 55,
+        55, 56, 57, 59, 61, 63, 65, 65, 51, 50, 48, 52, 56, 57, 58, 58, 58, 59,
+        60, 62, 64, 66, 68, 68, 52, 49, 47, 51, 55, 58, 60, 61, 62, 63, 64, 66,
+        67, 69, 70, 70, 53, 50, 47, 51, 55, 58, 61, 63, 65, 66, 67, 69, 70, 71,
+        73, 73, 55, 51, 48, 51, 55, 58, 62, 65, 67, 69, 70, 72, 73, 75, 76, 76,
+        57, 53, 49, 53, 56, 59, 63, 66, 69, 71, 73, 74, 76, 77, 78, 78, 59, 55,
+        51, 54, 57, 60, 64, 67, 70, 73, 75, 77, 78, 80, 81, 81, 62, 57, 54, 56,
+        59, 62, 66, 69, 72, 74, 77, 78, 80, 82, 83, 83, 64, 60, 56, 58, 61, 64,
+        67, 70, 73, 76, 78, 80, 82, 84, 85, 85, 67, 63, 59, 61, 63, 66, 69, 71,
+        75, 77, 80, 82, 84, 85, 87, 87, 70, 65, 61, 63, 65, 68, 70, 73, 76, 78,
+        81, 83, 85, 87, 89, 89, 70, 65, 61, 63, 65, 68, 70, 73, 76, 78, 81, 83,
+        85, 87, 89, 89,
+        /* Size 32 */
+        36, 35, 33, 32, 30, 33, 37, 43, 49, 50, 50, 51, 51, 52, 53, 53, 54, 55,
+        56, 57, 58, 60, 61, 62, 64, 65, 66, 68, 69, 69, 69, 69, 35, 34, 33, 33,
+        32, 35, 39, 44, 49, 49, 50, 50, 50, 50, 51, 52, 52, 53, 54, 55, 56, 58,
+        59, 60, 62, 63, 64, 66, 67, 67, 67, 67, 33, 33, 34, 34, 35, 38, 41, 45,
+        49, 49, 49, 49, 49, 49, 49, 50, 50, 51, 52, 53, 54, 56, 57, 58, 59, 61,
+        62, 63, 65, 65, 65, 65, 32, 33, 34, 36, 38, 40, 43, 46, 50, 49, 48, 48,
+        47, 48, 48, 48, 49, 50, 51, 51, 52, 54, 55, 56, 57, 59, 60, 61, 63, 63,
+        63, 63, 30, 32, 35, 38, 41, 43, 45, 47, 50, 49, 48, 47, 46, 46, 47, 47,
+        47, 48, 49, 50, 51, 52, 53, 54, 55, 57, 58, 59, 61, 61, 61, 61, 33, 35,
+        38, 40, 43, 45, 47, 49, 51, 50, 50, 49, 48, 48, 48, 49, 49, 50, 50, 51,
+        52, 53, 54, 55, 57, 58, 59, 60, 62, 62, 62, 62, 37, 39, 41, 43, 45, 47,
+        49, 51, 53, 52, 51, 51, 50, 50, 50, 50, 51, 51, 52, 53, 54, 54, 56, 57,
+        58, 59, 60, 61, 63, 63, 63, 63, 43, 44, 45, 46, 47, 49, 51, 52, 54, 54,
+        53, 53, 52, 52, 52, 52, 52, 53, 54, 54, 55, 56, 57, 58, 59, 60, 61, 62,
+        63, 63, 63, 63, 49, 49, 49, 50, 50, 51, 53, 54, 56, 56, 55, 55, 55, 55,
+        55, 55, 54, 55, 56, 56, 57, 57, 58, 59, 60, 61, 62, 63, 64, 64, 64, 64,
+        50, 49, 49, 49, 49, 50, 52, 54, 56, 56, 56, 56, 56, 56, 56, 56, 56, 57,
+        57, 58, 58, 59, 60, 61, 62, 63, 64, 65, 66, 66, 66, 66, 50, 50, 49, 48,
+        48, 50, 51, 53, 55, 56, 56, 57, 57, 57, 57, 58, 58, 58, 59, 59, 60, 61,
+        61, 62, 63, 64, 65, 66, 67, 67, 67, 67, 51, 50, 49, 48, 47, 49, 51, 53,
+        55, 56, 57, 57, 58, 59, 59, 59, 60, 60, 61, 61, 62, 62, 63, 64, 65, 65,
+        66, 67, 68, 68, 68, 68, 51, 50, 49, 47, 46, 48, 50, 52, 55, 56, 57, 58,
+        60, 60, 61, 61, 62, 62, 63, 63, 64, 64, 65, 66, 66, 67, 68, 69, 70, 70,
+        70, 70, 52, 50, 49, 48, 46, 48, 50, 52, 55, 56, 57, 59, 60, 61, 62, 62,
+        63, 63, 64, 65, 65, 66, 66, 67, 68, 68, 69, 70, 71, 71, 71, 71, 53, 51,
+        49, 48, 47, 48, 50, 52, 55, 56, 57, 59, 61, 62, 62, 63, 64, 65, 65, 66,
+        67, 67, 68, 69, 69, 70, 71, 71, 72, 72, 72, 72, 53, 52, 50, 48, 47, 49,
+        50, 52, 55, 56, 58, 59, 61, 62, 63, 64, 65, 66, 67, 67, 68, 69, 69, 70,
+        71, 71, 72, 73, 74, 74, 74, 74, 54, 52, 50, 49, 47, 49, 51, 52, 54, 56,
+        58, 60, 62, 63, 64, 65, 67, 67, 68, 69, 70, 70, 71, 72, 72, 73, 74, 74,
+        75, 75, 75, 75, 55, 53, 51, 50, 48, 50, 51, 53, 55, 57, 58, 60, 62, 63,
+        65, 66, 67, 68, 69, 70, 71, 71, 72, 73, 74, 74, 75, 76, 76, 76, 76, 76,
+        56, 54, 52, 51, 49, 50, 52, 54, 56, 57, 59, 61, 63, 64, 65, 67, 68, 69,
+        70, 71, 72, 73, 73, 74, 75, 76, 76, 77, 78, 78, 78, 78, 57, 55, 53, 51,
+        50, 51, 53, 54, 56, 58, 59, 61, 63, 65, 66, 67, 69, 70, 71, 72, 73, 74,
+        75, 75, 76, 77, 78, 78, 79, 79, 79, 79, 58, 56, 54, 52, 51, 52, 54, 55,
+        57, 58, 60, 62, 64, 65, 67, 68, 70, 71, 72, 73, 74, 75, 76, 77, 77, 78,
+        79, 80, 80, 80, 80, 80, 60, 58, 56, 54, 52, 53, 54, 56, 57, 59, 61, 62,
+        64, 66, 67, 69, 70, 71, 73, 74, 75, 76, 77, 78, 78, 79, 80, 81, 81, 81,
+        81, 81, 61, 59, 57, 55, 53, 54, 56, 57, 58, 60, 61, 63, 65, 66, 68, 69,
+        71, 72, 73, 75, 76, 77, 78, 78, 79, 80, 81, 82, 82, 82, 82, 82, 62, 60,
+        58, 56, 54, 55, 57, 58, 59, 61, 62, 64, 66, 67, 69, 70, 72, 73, 74, 75,
+        77, 78, 78, 79, 80, 81, 82, 83, 83, 83, 83, 83, 64, 62, 59, 57, 55, 57,
+        58, 59, 60, 62, 63, 65, 66, 68, 69, 71, 72, 74, 75, 76, 77, 78, 79, 80,
+        81, 82, 83, 84, 84, 84, 84, 84, 65, 63, 61, 59, 57, 58, 59, 60, 61, 63,
+        64, 65, 67, 68, 70, 71, 73, 74, 76, 77, 78, 79, 80, 81, 82, 83, 84, 84,
+        85, 85, 85, 85, 66, 64, 62, 60, 58, 59, 60, 61, 62, 64, 65, 66, 68, 69,
+        71, 72, 74, 75, 76, 78, 79, 80, 81, 82, 83, 84, 84, 85, 86, 86, 86, 86,
+        68, 66, 63, 61, 59, 60, 61, 62, 63, 65, 66, 67, 69, 70, 71, 73, 74, 76,
+        77, 78, 80, 81, 82, 83, 84, 84, 85, 86, 87, 87, 87, 87, 69, 67, 65, 63,
+        61, 62, 63, 63, 64, 66, 67, 68, 70, 71, 72, 74, 75, 76, 78, 79, 80, 81,
+        82, 83, 84, 85, 86, 87, 88, 88, 88, 88, 69, 67, 65, 63, 61, 62, 63, 63,
+        64, 66, 67, 68, 70, 71, 72, 74, 75, 76, 78, 79, 80, 81, 82, 83, 84, 85,
+        86, 87, 88, 88, 88, 88, 69, 67, 65, 63, 61, 62, 63, 63, 64, 66, 67, 68,
+        70, 71, 72, 74, 75, 76, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 88,
+        88, 88, 69, 67, 65, 63, 61, 62, 63, 63, 64, 66, 67, 68, 70, 71, 72, 74,
+        75, 76, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 88, 88, 88 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 68, 96, 122, 68, 90, 110, 128, 96, 110, 130, 142, 122, 128, 142,
+        150,
+        /* Size 8 */
+        64, 52, 55, 67, 81, 95, 107, 116, 52, 58, 56, 63, 74, 86, 98, 108, 55,
+        56, 71, 78, 86, 95, 104, 113, 67, 63, 78, 91, 99, 106, 112, 118, 81, 74,
+        86, 99, 108, 114, 119, 124, 95, 86, 95, 106, 114, 120, 125, 128, 107,
+        98, 104, 112, 119, 125, 129, 132, 116, 108, 113, 118, 124, 128, 132,
+        134,
+        /* Size 16 */
+        64, 57, 52, 53, 55, 61, 67, 74, 81, 88, 95, 100, 107, 111, 116, 116, 57,
+        56, 55, 55, 55, 60, 65, 71, 77, 84, 91, 96, 102, 107, 112, 112, 52, 55,
+        58, 57, 56, 59, 63, 68, 74, 80, 86, 92, 98, 103, 108, 108, 53, 55, 57,
+        59, 62, 66, 70, 74, 80, 85, 91, 96, 101, 106, 110, 110, 55, 55, 56, 62,
+        71, 74, 78, 82, 86, 91, 95, 100, 104, 108, 113, 113, 61, 60, 59, 66, 74,
+        79, 84, 88, 92, 96, 100, 104, 108, 112, 115, 115, 67, 65, 63, 70, 78,
+        84, 91, 94, 99, 102, 106, 109, 112, 115, 118, 118, 74, 71, 68, 74, 82,
+        88, 94, 98, 103, 106, 110, 112, 116, 118, 121, 121, 81, 77, 74, 80, 86,
+        92, 99, 103, 108, 111, 114, 117, 119, 121, 124, 124, 88, 84, 80, 85, 91,
+        96, 102, 106, 111, 114, 117, 119, 122, 124, 126, 126, 95, 91, 86, 91,
+        95, 100, 106, 110, 114, 117, 120, 122, 125, 126, 128, 128, 100, 96, 92,
+        96, 100, 104, 109, 112, 117, 119, 122, 124, 127, 128, 130, 130, 107,
+        102, 98, 101, 104, 108, 112, 116, 119, 122, 125, 127, 129, 130, 132,
+        132, 111, 107, 103, 106, 108, 112, 115, 118, 121, 124, 126, 128, 130,
+        131, 133, 133, 116, 112, 108, 110, 113, 115, 118, 121, 124, 126, 128,
+        130, 132, 133, 134, 134, 116, 112, 108, 110, 113, 115, 118, 121, 124,
+        126, 128, 130, 132, 133, 134, 134,
+        /* Size 32 */
+        64, 60, 57, 55, 52, 53, 53, 54, 55, 58, 61, 64, 67, 70, 74, 77, 81, 84,
+        88, 91, 95, 98, 100, 103, 107, 109, 111, 113, 116, 116, 116, 116, 60,
+        58, 57, 55, 53, 54, 54, 55, 55, 58, 60, 63, 66, 69, 72, 76, 79, 82, 86,
+        89, 93, 95, 98, 101, 104, 107, 109, 111, 114, 114, 114, 114, 57, 57, 56,
+        55, 55, 55, 55, 55, 55, 57, 60, 62, 65, 68, 71, 74, 77, 80, 84, 87, 91,
+        93, 96, 99, 102, 105, 107, 109, 112, 112, 112, 112, 55, 55, 55, 56, 56,
+        56, 56, 56, 56, 57, 59, 61, 64, 66, 69, 72, 76, 79, 82, 85, 88, 91, 94,
+        97, 100, 103, 105, 108, 110, 110, 110, 110, 52, 53, 55, 56, 58, 57, 57,
+        56, 56, 57, 59, 61, 63, 65, 68, 71, 74, 77, 80, 83, 86, 89, 92, 95, 98,
+        101, 103, 106, 108, 108, 108, 108, 53, 54, 55, 56, 57, 58, 58, 58, 59,
+        60, 62, 64, 66, 68, 71, 74, 77, 79, 82, 85, 89, 91, 94, 97, 100, 102,
+        104, 107, 109, 109, 109, 109, 53, 54, 55, 56, 57, 58, 59, 61, 62, 64,
+        66, 68, 70, 72, 74, 77, 80, 82, 85, 88, 91, 93, 96, 98, 101, 103, 106,
+        108, 110, 110, 110, 110, 54, 55, 55, 56, 56, 58, 61, 63, 66, 68, 70, 72,
+        74, 76, 78, 80, 83, 85, 88, 90, 93, 95, 98, 100, 103, 105, 107, 109,
+        112, 112, 112, 112, 55, 55, 55, 56, 56, 59, 62, 66, 71, 73, 74, 76, 78,
+        80, 82, 84, 86, 88, 91, 93, 95, 97, 100, 102, 104, 106, 108, 110, 113,
+        113, 113, 113, 58, 58, 57, 57, 57, 60, 64, 68, 73, 74, 77, 79, 81, 83,
+        85, 87, 89, 91, 93, 95, 98, 100, 102, 104, 106, 108, 110, 112, 114, 114,
+        114, 114, 61, 60, 60, 59, 59, 62, 66, 70, 74, 77, 79, 81, 84, 86, 88,
+        90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 113, 115, 115,
+        115, 115, 64, 63, 62, 61, 61, 64, 68, 72, 76, 79, 81, 84, 87, 89, 91,
+        93, 95, 97, 99, 101, 103, 105, 106, 108, 110, 112, 113, 115, 117, 117,
+        117, 117, 67, 66, 65, 64, 63, 66, 70, 74, 78, 81, 84, 87, 91, 92, 94,
+        96, 99, 100, 102, 104, 106, 107, 109, 110, 112, 114, 115, 117, 118, 118,
+        118, 118, 70, 69, 68, 66, 65, 68, 72, 76, 80, 83, 86, 89, 92, 94, 96,
+        99, 101, 102, 104, 106, 108, 109, 111, 112, 114, 115, 117, 118, 119,
+        119, 119, 119, 74, 72, 71, 69, 68, 71, 74, 78, 82, 85, 88, 91, 94, 96,
+        98, 101, 103, 105, 106, 108, 110, 111, 112, 114, 116, 117, 118, 119,
+        121, 121, 121, 121, 77, 76, 74, 72, 71, 74, 77, 80, 84, 87, 90, 93, 96,
+        99, 101, 103, 105, 107, 108, 110, 112, 113, 114, 116, 117, 118, 120,
+        121, 122, 122, 122, 122, 81, 79, 77, 76, 74, 77, 80, 83, 86, 89, 92, 95,
+        99, 101, 103, 105, 108, 109, 111, 112, 114, 115, 117, 118, 119, 120,
+        121, 122, 124, 124, 124, 124, 84, 82, 80, 79, 77, 79, 82, 85, 88, 91,
+        94, 97, 100, 102, 105, 107, 109, 111, 112, 114, 115, 117, 118, 119, 120,
+        121, 123, 124, 125, 125, 125, 125, 88, 86, 84, 82, 80, 82, 85, 88, 91,
+        93, 96, 99, 102, 104, 106, 108, 111, 112, 114, 115, 117, 118, 119, 121,
+        122, 123, 124, 125, 126, 126, 126, 126, 91, 89, 87, 85, 83, 85, 88, 90,
+        93, 95, 98, 101, 104, 106, 108, 110, 112, 114, 115, 117, 119, 120, 121,
+        122, 123, 124, 125, 126, 127, 127, 127, 127, 95, 93, 91, 88, 86, 89, 91,
+        93, 95, 98, 100, 103, 106, 108, 110, 112, 114, 115, 117, 119, 120, 121,
+        122, 123, 125, 125, 126, 127, 128, 128, 128, 128, 98, 95, 93, 91, 89,
+        91, 93, 95, 97, 100, 102, 105, 107, 109, 111, 113, 115, 117, 118, 120,
+        121, 122, 123, 124, 126, 126, 127, 128, 129, 129, 129, 129, 100, 98, 96,
+        94, 92, 94, 96, 98, 100, 102, 104, 106, 109, 111, 112, 114, 117, 118,
+        119, 121, 122, 123, 124, 125, 127, 127, 128, 129, 130, 130, 130, 130,
+        103, 101, 99, 97, 95, 97, 98, 100, 102, 104, 106, 108, 110, 112, 114,
+        116, 118, 119, 121, 122, 123, 124, 125, 127, 128, 128, 129, 130, 131,
+        131, 131, 131, 107, 104, 102, 100, 98, 100, 101, 103, 104, 106, 108,
+        110, 112, 114, 116, 117, 119, 120, 122, 123, 125, 126, 127, 128, 129,
+        129, 130, 131, 132, 132, 132, 132, 109, 107, 105, 103, 101, 102, 103,
+        105, 106, 108, 110, 112, 114, 115, 117, 118, 120, 121, 123, 124, 125,
+        126, 127, 128, 129, 130, 131, 131, 132, 132, 132, 132, 111, 109, 107,
+        105, 103, 104, 106, 107, 108, 110, 112, 113, 115, 117, 118, 120, 121,
+        123, 124, 125, 126, 127, 128, 129, 130, 131, 131, 132, 133, 133, 133,
+        133, 113, 111, 109, 108, 106, 107, 108, 109, 110, 112, 113, 115, 117,
+        118, 119, 121, 122, 124, 125, 126, 127, 128, 129, 130, 131, 131, 132,
+        133, 134, 134, 134, 134, 116, 114, 112, 110, 108, 109, 110, 112, 113,
+        114, 115, 117, 118, 119, 121, 122, 124, 125, 126, 127, 128, 129, 130,
+        131, 132, 132, 133, 134, 134, 134, 134, 134, 116, 114, 112, 110, 108,
+        109, 110, 112, 113, 114, 115, 117, 118, 119, 121, 122, 124, 125, 126,
+        127, 128, 129, 130, 131, 132, 132, 133, 134, 134, 134, 134, 134, 116,
+        114, 112, 110, 108, 109, 110, 112, 113, 114, 115, 117, 118, 119, 121,
+        122, 124, 125, 126, 127, 128, 129, 130, 131, 132, 132, 133, 134, 134,
+        134, 134, 134, 116, 114, 112, 110, 108, 109, 110, 112, 113, 114, 115,
+        117, 118, 119, 121, 122, 124, 125, 126, 127, 128, 129, 130, 131, 132,
+        132, 133, 134, 134, 134, 134, 134 },
+      { /* Intra matrices */
+        /* Size 4 */
+        35, 37, 53, 69, 37, 50, 61, 72, 53, 61, 74, 81, 69, 72, 81, 86,
+        /* Size 8 */
+        40, 32, 34, 42, 51, 60, 68, 75, 32, 36, 34, 39, 46, 55, 63, 70, 34, 34,
+        44, 49, 54, 61, 67, 73, 42, 39, 49, 57, 63, 68, 72, 76, 51, 46, 54, 63,
+        69, 74, 77, 80, 60, 55, 61, 68, 74, 78, 81, 84, 68, 63, 67, 72, 77, 81,
+        84, 86, 75, 70, 73, 76, 80, 84, 86, 88,
+        /* Size 16 */
+        39, 35, 31, 32, 33, 37, 41, 45, 50, 54, 59, 63, 67, 70, 73, 73, 35, 34,
+        33, 33, 33, 36, 39, 43, 47, 51, 56, 60, 64, 67, 70, 70, 31, 33, 35, 34,
+        34, 36, 38, 41, 45, 49, 53, 57, 61, 64, 68, 68, 32, 33, 34, 36, 38, 40,
+        42, 45, 49, 52, 56, 59, 63, 66, 69, 69, 33, 33, 34, 38, 43, 45, 48, 50,
+        53, 56, 59, 62, 65, 68, 71, 71, 37, 36, 36, 40, 45, 48, 52, 54, 57, 60,
+        62, 65, 68, 70, 73, 73, 41, 39, 38, 42, 48, 52, 56, 59, 61, 64, 66, 68,
+        71, 73, 75, 75, 45, 43, 41, 45, 50, 54, 59, 61, 64, 66, 69, 71, 73, 75,
+        77, 77, 50, 47, 45, 49, 53, 57, 61, 64, 67, 70, 72, 74, 75, 77, 78, 78,
+        54, 51, 49, 52, 56, 60, 64, 66, 70, 72, 74, 76, 77, 79, 80, 80, 59, 56,
+        53, 56, 59, 62, 66, 69, 72, 74, 76, 78, 79, 80, 82, 82, 63, 60, 57, 59,
+        62, 65, 68, 71, 74, 76, 78, 79, 81, 82, 83, 83, 67, 64, 61, 63, 65, 68,
+        71, 73, 75, 77, 79, 81, 82, 83, 84, 84, 70, 67, 64, 66, 68, 70, 73, 75,
+        77, 79, 80, 82, 83, 84, 85, 85, 73, 70, 68, 69, 71, 73, 75, 77, 78, 80,
+        82, 83, 84, 85, 86, 86, 73, 70, 68, 69, 71, 73, 75, 77, 78, 80, 82, 83,
+        84, 85, 86, 86,
+        /* Size 32 */
+        38, 36, 34, 32, 31, 31, 32, 32, 33, 34, 36, 38, 40, 42, 44, 47, 49, 51,
+        53, 56, 58, 60, 62, 64, 66, 67, 69, 71, 72, 72, 72, 72, 36, 35, 34, 33,
+        32, 32, 32, 33, 33, 34, 36, 38, 40, 41, 43, 46, 48, 50, 52, 54, 57, 59,
+        60, 62, 64, 66, 68, 69, 71, 71, 71, 71, 34, 34, 33, 33, 32, 33, 33, 33,
+        33, 34, 36, 37, 39, 41, 42, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65,
+        66, 68, 70, 70, 70, 70, 32, 33, 33, 33, 33, 33, 33, 33, 33, 34, 35, 37,
+        38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 63, 65, 67, 68, 68,
+        68, 68, 31, 32, 32, 33, 34, 34, 34, 33, 33, 34, 35, 36, 37, 39, 41, 43,
+        45, 46, 48, 50, 53, 54, 56, 58, 60, 62, 64, 65, 67, 67, 67, 67, 31, 32,
+        33, 33, 34, 34, 35, 35, 35, 36, 37, 38, 40, 41, 43, 44, 46, 48, 50, 52,
+        54, 56, 57, 59, 61, 63, 64, 66, 68, 68, 68, 68, 32, 32, 33, 33, 34, 35,
+        35, 36, 37, 38, 39, 41, 42, 43, 45, 46, 48, 50, 52, 53, 55, 57, 59, 61,
+        62, 64, 65, 67, 69, 69, 69, 69, 32, 33, 33, 33, 33, 35, 36, 38, 40, 41,
+        42, 43, 44, 46, 47, 49, 50, 52, 53, 55, 57, 58, 60, 62, 63, 65, 66, 68,
+        69, 69, 69, 69, 33, 33, 33, 33, 33, 35, 37, 40, 43, 44, 45, 46, 47, 49,
+        50, 51, 53, 54, 55, 57, 58, 60, 61, 63, 65, 66, 67, 69, 70, 70, 70, 70,
+        34, 34, 34, 34, 34, 36, 38, 41, 44, 45, 46, 48, 49, 50, 52, 53, 54, 56,
+        57, 59, 60, 61, 63, 64, 66, 67, 68, 70, 71, 71, 71, 71, 36, 36, 36, 35,
+        35, 37, 39, 42, 45, 46, 48, 49, 51, 52, 54, 55, 56, 58, 59, 60, 62, 63,
+        64, 66, 67, 68, 69, 71, 72, 72, 72, 72, 38, 38, 37, 37, 36, 38, 41, 43,
+        46, 48, 49, 51, 53, 54, 56, 57, 58, 60, 61, 62, 63, 65, 66, 67, 68, 69,
+        71, 72, 73, 73, 73, 73, 40, 40, 39, 38, 37, 40, 42, 44, 47, 49, 51, 53,
+        55, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 73, 74, 74,
+        74, 74, 42, 41, 41, 40, 39, 41, 43, 46, 49, 50, 52, 54, 57, 58, 59, 61,
+        62, 63, 64, 65, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 75, 75, 44, 43,
+        42, 42, 41, 43, 45, 47, 50, 52, 54, 56, 58, 59, 61, 62, 64, 65, 66, 67,
+        68, 69, 70, 71, 72, 73, 74, 75, 76, 76, 76, 76, 47, 46, 45, 44, 43, 44,
+        46, 49, 51, 53, 55, 57, 59, 61, 62, 64, 65, 66, 67, 68, 69, 70, 71, 72,
+        73, 74, 75, 76, 77, 77, 77, 77, 49, 48, 47, 46, 45, 46, 48, 50, 53, 54,
+        56, 58, 61, 62, 64, 65, 67, 68, 69, 70, 71, 72, 73, 74, 74, 75, 76, 77,
+        78, 78, 78, 78, 51, 50, 49, 48, 46, 48, 50, 52, 54, 56, 58, 60, 62, 63,
+        65, 66, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 78, 78, 78, 78,
+        53, 52, 51, 50, 48, 50, 52, 53, 55, 57, 59, 61, 63, 64, 66, 67, 69, 70,
+        71, 72, 73, 74, 75, 75, 76, 77, 78, 78, 79, 79, 79, 79, 56, 54, 53, 52,
+        50, 52, 53, 55, 57, 59, 60, 62, 64, 65, 67, 68, 70, 71, 72, 73, 74, 75,
+        76, 76, 77, 78, 79, 79, 80, 80, 80, 80, 58, 57, 55, 54, 53, 54, 55, 57,
+        58, 60, 62, 63, 65, 67, 68, 69, 71, 72, 73, 74, 75, 76, 77, 77, 78, 79,
+        79, 80, 81, 81, 81, 81, 60, 59, 57, 56, 54, 56, 57, 58, 60, 61, 63, 65,
+        66, 68, 69, 70, 72, 73, 74, 75, 76, 77, 77, 78, 79, 79, 80, 81, 81, 81,
+        81, 81, 62, 60, 59, 58, 56, 57, 59, 60, 61, 63, 64, 66, 67, 69, 70, 71,
+        73, 74, 75, 76, 77, 77, 78, 79, 80, 80, 81, 81, 82, 82, 82, 82, 64, 62,
+        61, 60, 58, 59, 61, 62, 63, 64, 66, 67, 69, 70, 71, 72, 74, 75, 75, 76,
+        77, 78, 79, 80, 80, 81, 81, 82, 82, 82, 82, 82, 66, 64, 63, 62, 60, 61,
+        62, 63, 65, 66, 67, 68, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 80,
+        81, 82, 82, 83, 83, 83, 83, 83, 67, 66, 65, 63, 62, 63, 64, 65, 66, 67,
+        68, 69, 71, 72, 73, 74, 75, 76, 77, 78, 79, 79, 80, 81, 82, 82, 83, 83,
+        84, 84, 84, 84, 69, 68, 66, 65, 64, 64, 65, 66, 67, 68, 69, 71, 72, 73,
+        74, 75, 76, 77, 78, 79, 79, 80, 81, 81, 82, 83, 83, 84, 84, 84, 84, 84,
+        71, 69, 68, 67, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78,
+        78, 79, 80, 81, 81, 82, 83, 83, 84, 84, 84, 84, 84, 84, 72, 71, 70, 68,
+        67, 68, 69, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 78, 79, 80, 81, 81,
+        82, 82, 83, 84, 84, 84, 85, 85, 85, 85, 72, 71, 70, 68, 67, 68, 69, 69,
+        70, 71, 72, 73, 74, 75, 76, 77, 78, 78, 79, 80, 81, 81, 82, 82, 83, 84,
+        84, 84, 85, 85, 85, 85, 72, 71, 70, 68, 67, 68, 69, 69, 70, 71, 72, 73,
+        74, 75, 76, 77, 78, 78, 79, 80, 81, 81, 82, 82, 83, 84, 84, 84, 85, 85,
+        85, 85, 72, 71, 70, 68, 67, 68, 69, 69, 70, 71, 72, 73, 74, 75, 76, 77,
+        78, 78, 79, 80, 81, 81, 82, 82, 83, 84, 84, 84, 85, 85, 85, 85 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 88, 93, 107, 88, 99, 103, 112, 93, 103, 119, 127, 107, 112, 127,
+        137,
+        /* Size 8 */
+        64, 54, 83, 87, 90, 97, 104, 111, 54, 71, 84, 79, 81, 86, 92, 100, 83,
+        84, 93, 91, 91, 94, 99, 104, 87, 79, 91, 98, 101, 103, 107, 111, 90, 81,
+        91, 101, 107, 111, 115, 118, 97, 86, 94, 103, 111, 117, 121, 124, 104,
+        92, 99, 107, 115, 121, 126, 129, 111, 100, 104, 111, 118, 124, 129, 133,
+        /* Size 16 */
+        64, 59, 54, 66, 83, 85, 87, 88, 90, 93, 97, 100, 104, 107, 111, 111, 59,
+        60, 62, 71, 84, 83, 83, 84, 85, 88, 91, 94, 98, 101, 105, 105, 54, 62,
+        71, 77, 84, 82, 79, 80, 81, 83, 86, 89, 92, 96, 100, 100, 66, 71, 77,
+        83, 88, 87, 85, 85, 85, 87, 90, 92, 95, 99, 102, 102, 83, 84, 84, 88,
+        93, 92, 91, 91, 91, 92, 94, 96, 99, 101, 104, 104, 85, 83, 82, 87, 92,
+        93, 95, 95, 96, 97, 98, 100, 103, 105, 108, 108, 87, 83, 79, 85, 91, 95,
+        98, 100, 101, 102, 103, 105, 107, 109, 111, 111, 88, 84, 80, 85, 91, 95,
+        100, 102, 104, 106, 107, 109, 111, 113, 115, 115, 90, 85, 81, 85, 91,
+        96, 101, 104, 107, 109, 111, 113, 115, 116, 118, 118, 93, 88, 83, 87,
+        92, 97, 102, 106, 109, 112, 114, 116, 118, 120, 121, 121, 97, 91, 86,
+        90, 94, 98, 103, 107, 111, 114, 117, 119, 121, 123, 124, 124, 100, 94,
+        89, 92, 96, 100, 105, 109, 113, 116, 119, 121, 123, 125, 127, 127, 104,
+        98, 92, 95, 99, 103, 107, 111, 115, 118, 121, 123, 126, 128, 129, 129,
+        107, 101, 96, 99, 101, 105, 109, 113, 116, 120, 123, 125, 128, 129, 131,
+        131, 111, 105, 100, 102, 104, 108, 111, 115, 118, 121, 124, 127, 129,
+        131, 133, 133, 111, 105, 100, 102, 104, 108, 111, 115, 118, 121, 124,
+        127, 129, 131, 133, 133,
+        /* Size 32 */
+        64, 61, 59, 57, 54, 60, 66, 74, 83, 84, 85, 86, 87, 87, 88, 89, 90, 92,
+        93, 95, 97, 98, 100, 102, 104, 105, 107, 109, 111, 111, 111, 111, 61,
+        60, 60, 59, 58, 63, 68, 75, 84, 84, 84, 84, 85, 85, 86, 87, 88, 89, 90,
+        92, 94, 95, 97, 99, 101, 102, 104, 106, 108, 108, 108, 108, 59, 60, 60,
+        61, 62, 66, 71, 77, 84, 84, 83, 83, 83, 83, 84, 84, 85, 86, 88, 89, 91,
+        92, 94, 96, 98, 99, 101, 103, 105, 105, 105, 105, 57, 59, 61, 64, 66,
+        70, 74, 79, 84, 83, 82, 82, 81, 81, 82, 82, 83, 84, 85, 87, 88, 90, 91,
+        93, 95, 97, 98, 100, 102, 102, 102, 102, 54, 58, 62, 66, 71, 74, 77, 81,
+        84, 83, 82, 80, 79, 79, 80, 80, 81, 82, 83, 84, 86, 87, 89, 91, 92, 94,
+        96, 98, 100, 100, 100, 100, 60, 63, 66, 70, 74, 77, 80, 83, 86, 85, 84,
+        83, 82, 82, 82, 83, 83, 84, 85, 86, 88, 89, 91, 92, 94, 95, 97, 99, 101,
+        101, 101, 101, 66, 68, 71, 74, 77, 80, 83, 85, 88, 88, 87, 86, 85, 85,
+        85, 85, 85, 86, 87, 89, 90, 91, 92, 94, 95, 97, 99, 100, 102, 102, 102,
+        102, 74, 75, 77, 79, 81, 83, 85, 88, 91, 90, 89, 89, 88, 88, 88, 88, 88,
+        89, 90, 91, 92, 93, 94, 96, 97, 98, 100, 102, 103, 103, 103, 103, 83,
+        84, 84, 84, 84, 86, 88, 91, 93, 93, 92, 92, 91, 91, 91, 91, 91, 92, 92,
+        93, 94, 95, 96, 97, 99, 100, 101, 103, 104, 104, 104, 104, 84, 84, 84,
+        83, 83, 85, 88, 90, 93, 93, 93, 93, 93, 93, 93, 93, 93, 94, 95, 95, 96,
+        97, 98, 99, 101, 102, 103, 105, 106, 106, 106, 106, 85, 84, 83, 82, 82,
+        84, 87, 89, 92, 93, 93, 94, 95, 95, 95, 95, 96, 96, 97, 98, 98, 99, 100,
+        102, 103, 104, 105, 106, 108, 108, 108, 108, 86, 84, 83, 82, 80, 83, 86,
+        89, 92, 93, 94, 95, 96, 97, 97, 98, 98, 99, 100, 100, 101, 102, 103,
+        104, 105, 106, 107, 108, 109, 109, 109, 109, 87, 85, 83, 81, 79, 82, 85,
+        88, 91, 93, 95, 96, 98, 99, 100, 100, 101, 102, 102, 103, 103, 104, 105,
+        106, 107, 108, 109, 110, 111, 111, 111, 111, 87, 85, 83, 81, 79, 82, 85,
+        88, 91, 93, 95, 97, 99, 100, 101, 102, 102, 103, 104, 105, 105, 106,
+        107, 108, 109, 110, 111, 112, 113, 113, 113, 113, 88, 86, 84, 82, 80,
+        82, 85, 88, 91, 93, 95, 97, 100, 101, 102, 103, 104, 105, 106, 106, 107,
+        108, 109, 110, 111, 112, 113, 114, 115, 115, 115, 115, 89, 87, 84, 82,
+        80, 83, 85, 88, 91, 93, 95, 98, 100, 102, 103, 104, 106, 107, 107, 108,
+        109, 110, 111, 112, 113, 114, 115, 115, 116, 116, 116, 116, 90, 88, 85,
+        83, 81, 83, 85, 88, 91, 93, 96, 98, 101, 102, 104, 106, 107, 108, 109,
+        110, 111, 112, 113, 114, 115, 116, 116, 117, 118, 118, 118, 118, 92, 89,
+        86, 84, 82, 84, 86, 89, 92, 94, 96, 99, 102, 103, 105, 107, 108, 109,
+        111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 120, 120, 120,
+        93, 90, 88, 85, 83, 85, 87, 90, 92, 95, 97, 100, 102, 104, 106, 107,
+        109, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 120, 121, 121,
+        121, 121, 95, 92, 89, 87, 84, 86, 89, 91, 93, 95, 98, 100, 103, 105,
+        106, 108, 110, 112, 113, 114, 116, 116, 117, 118, 119, 120, 121, 122,
+        123, 123, 123, 123, 97, 94, 91, 88, 86, 88, 90, 92, 94, 96, 98, 101,
+        103, 105, 107, 109, 111, 113, 114, 116, 117, 118, 119, 120, 121, 122,
+        123, 124, 124, 124, 124, 124, 98, 95, 92, 90, 87, 89, 91, 93, 95, 97,
+        99, 102, 104, 106, 108, 110, 112, 114, 115, 116, 118, 119, 120, 121,
+        122, 123, 124, 125, 126, 126, 126, 126, 100, 97, 94, 91, 89, 91, 92, 94,
+        96, 98, 100, 103, 105, 107, 109, 111, 113, 114, 116, 117, 119, 120, 121,
+        122, 123, 124, 125, 126, 127, 127, 127, 127, 102, 99, 96, 93, 91, 92,
+        94, 96, 97, 99, 102, 104, 106, 108, 110, 112, 114, 115, 117, 118, 120,
+        121, 122, 123, 125, 125, 126, 127, 128, 128, 128, 128, 104, 101, 98, 95,
+        92, 94, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115, 116, 118,
+        119, 121, 122, 123, 125, 126, 127, 128, 128, 129, 129, 129, 129, 105,
+        102, 99, 97, 94, 95, 97, 98, 100, 102, 104, 106, 108, 110, 112, 114,
+        116, 117, 119, 120, 122, 123, 124, 125, 127, 128, 128, 129, 130, 130,
+        130, 130, 107, 104, 101, 98, 96, 97, 99, 100, 101, 103, 105, 107, 109,
+        111, 113, 115, 116, 118, 120, 121, 123, 124, 125, 126, 128, 128, 129,
+        130, 131, 131, 131, 131, 109, 106, 103, 100, 98, 99, 100, 102, 103, 105,
+        106, 108, 110, 112, 114, 115, 117, 119, 120, 122, 124, 125, 126, 127,
+        128, 129, 130, 131, 132, 132, 132, 132, 111, 108, 105, 102, 100, 101,
+        102, 103, 104, 106, 108, 109, 111, 113, 115, 116, 118, 120, 121, 123,
+        124, 126, 127, 128, 129, 130, 131, 132, 133, 133, 133, 133, 111, 108,
+        105, 102, 100, 101, 102, 103, 104, 106, 108, 109, 111, 113, 115, 116,
+        118, 120, 121, 123, 124, 126, 127, 128, 129, 130, 131, 132, 133, 133,
+        133, 133, 111, 108, 105, 102, 100, 101, 102, 103, 104, 106, 108, 109,
+        111, 113, 115, 116, 118, 120, 121, 123, 124, 126, 127, 128, 129, 130,
+        131, 132, 133, 133, 133, 133, 111, 108, 105, 102, 100, 101, 102, 103,
+        104, 106, 108, 109, 111, 113, 115, 116, 118, 120, 121, 123, 124, 126,
+        127, 128, 129, 130, 131, 132, 133, 133, 133, 133 },
+      { /* Intra matrices */
+        /* Size 4 */
+        37, 52, 55, 64, 52, 59, 62, 67, 55, 62, 72, 77, 64, 67, 77, 84,
+        /* Size 8 */
+        40, 33, 52, 54, 57, 61, 66, 71, 33, 44, 53, 49, 50, 54, 58, 63, 52, 53,
+        59, 57, 57, 59, 63, 66, 54, 49, 57, 62, 64, 66, 68, 71, 57, 50, 57, 64,
+        68, 71, 74, 76, 61, 54, 59, 66, 71, 75, 78, 80, 66, 58, 63, 68, 74, 78,
+        81, 84, 71, 63, 66, 71, 76, 80, 84, 86,
+        /* Size 16 */
+        39, 36, 33, 40, 51, 52, 53, 55, 56, 58, 60, 62, 65, 67, 70, 70, 36, 37,
+        38, 43, 52, 51, 51, 52, 53, 54, 56, 58, 61, 63, 66, 66, 33, 38, 44, 47,
+        52, 50, 49, 49, 50, 51, 53, 55, 57, 60, 62, 62, 40, 43, 47, 51, 55, 53,
+        52, 52, 53, 54, 55, 57, 59, 61, 64, 64, 51, 52, 52, 55, 58, 57, 56, 56,
+        56, 57, 58, 60, 61, 63, 65, 65, 52, 51, 50, 53, 57, 58, 59, 59, 59, 60,
+        61, 63, 64, 66, 68, 68, 53, 51, 49, 52, 56, 59, 61, 62, 63, 64, 65, 66,
+        67, 68, 70, 70, 55, 52, 49, 52, 56, 59, 62, 64, 65, 66, 67, 68, 70, 71,
+        72, 72, 56, 53, 50, 53, 56, 59, 63, 65, 67, 69, 70, 71, 72, 73, 75, 75,
+        58, 54, 51, 54, 57, 60, 64, 66, 69, 70, 72, 73, 74, 76, 77, 77, 60, 56,
+        53, 55, 58, 61, 65, 67, 70, 72, 74, 75, 77, 78, 79, 79, 62, 58, 55, 57,
+        60, 63, 66, 68, 71, 73, 75, 77, 78, 79, 81, 81, 65, 61, 57, 59, 61, 64,
+        67, 70, 72, 74, 77, 78, 80, 81, 82, 82, 67, 63, 60, 61, 63, 66, 68, 71,
+        73, 76, 78, 79, 81, 82, 84, 84, 70, 66, 62, 64, 65, 68, 70, 72, 75, 77,
+        79, 81, 82, 84, 85, 85, 70, 66, 62, 64, 65, 68, 70, 72, 75, 77, 79, 81,
+        82, 84, 85, 85,
+        /* Size 32 */
+        39, 37, 35, 34, 33, 36, 40, 45, 51, 51, 52, 52, 53, 54, 54, 55, 55, 56,
+        57, 58, 59, 61, 62, 63, 64, 65, 67, 68, 69, 69, 69, 69, 37, 36, 36, 35,
+        35, 38, 41, 46, 51, 51, 51, 52, 52, 52, 53, 53, 54, 55, 56, 57, 58, 59,
+        60, 61, 62, 63, 64, 66, 67, 67, 67, 67, 35, 36, 36, 37, 37, 40, 43, 47,
+        51, 51, 51, 51, 50, 51, 51, 52, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
+        63, 64, 65, 65, 65, 65, 34, 35, 37, 38, 40, 42, 45, 48, 51, 51, 50, 50,
+        49, 50, 50, 50, 51, 51, 52, 53, 54, 55, 56, 57, 58, 60, 61, 62, 63, 63,
+        63, 63, 33, 35, 37, 40, 43, 45, 47, 49, 52, 51, 50, 49, 48, 48, 49, 49,
+        49, 50, 51, 52, 52, 53, 55, 56, 57, 58, 59, 60, 62, 62, 62, 62, 36, 38,
+        40, 42, 45, 47, 49, 51, 53, 52, 51, 51, 50, 50, 50, 50, 51, 51, 52, 53,
+        54, 55, 56, 57, 58, 59, 60, 61, 62, 62, 62, 62, 40, 41, 43, 45, 47, 49,
+        50, 52, 54, 54, 53, 52, 52, 52, 52, 52, 52, 53, 54, 54, 55, 56, 57, 58,
+        59, 60, 61, 62, 63, 63, 63, 63, 45, 46, 47, 48, 49, 51, 52, 54, 56, 55,
+        55, 54, 54, 54, 54, 54, 54, 55, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63,
+        64, 64, 64, 64, 51, 51, 51, 51, 52, 53, 54, 56, 57, 57, 57, 56, 56, 56,
+        56, 56, 56, 56, 57, 57, 58, 59, 59, 60, 61, 62, 63, 64, 65, 65, 65, 65,
+        51, 51, 51, 51, 51, 52, 54, 55, 57, 57, 57, 57, 57, 57, 57, 57, 57, 58,
+        58, 59, 59, 60, 61, 61, 62, 63, 64, 65, 66, 66, 66, 66, 52, 51, 51, 50,
+        50, 51, 53, 55, 57, 57, 57, 58, 58, 58, 59, 59, 59, 59, 60, 60, 61, 61,
+        62, 63, 64, 64, 65, 66, 67, 67, 67, 67, 52, 52, 51, 50, 49, 51, 52, 54,
+        56, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62, 62, 63, 64, 64, 65, 66,
+        66, 67, 68, 68, 68, 68, 53, 52, 50, 49, 48, 50, 52, 54, 56, 57, 58, 59,
+        61, 61, 61, 62, 62, 63, 63, 64, 64, 65, 65, 66, 66, 67, 68, 69, 69, 69,
+        69, 69, 54, 52, 51, 50, 48, 50, 52, 54, 56, 57, 58, 60, 61, 62, 62, 63,
+        63, 64, 64, 65, 65, 66, 66, 67, 68, 68, 69, 70, 70, 70, 70, 70, 54, 53,
+        51, 50, 49, 50, 52, 54, 56, 57, 59, 60, 61, 62, 63, 64, 64, 65, 66, 66,
+        67, 67, 68, 68, 69, 70, 70, 71, 72, 72, 72, 72, 55, 53, 52, 50, 49, 50,
+        52, 54, 56, 57, 59, 60, 62, 63, 64, 65, 66, 66, 67, 67, 68, 69, 69, 70,
+        70, 71, 71, 72, 73, 73, 73, 73, 55, 54, 52, 51, 49, 51, 52, 54, 56, 57,
+        59, 61, 62, 63, 64, 66, 67, 67, 68, 69, 69, 70, 70, 71, 72, 72, 73, 73,
+        74, 74, 74, 74, 56, 55, 53, 51, 50, 51, 53, 55, 56, 58, 59, 61, 63, 64,
+        65, 66, 67, 68, 69, 70, 70, 71, 71, 72, 73, 73, 74, 74, 75, 75, 75, 75,
+        57, 56, 54, 52, 51, 52, 54, 55, 57, 58, 60, 61, 63, 64, 66, 67, 68, 69,
+        70, 70, 71, 72, 72, 73, 74, 74, 75, 75, 76, 76, 76, 76, 58, 57, 55, 53,
+        52, 53, 54, 56, 57, 59, 60, 62, 64, 65, 66, 67, 69, 70, 70, 71, 72, 73,
+        73, 74, 75, 75, 76, 77, 77, 77, 77, 77, 59, 58, 56, 54, 52, 54, 55, 56,
+        58, 59, 61, 62, 64, 65, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 76,
+        77, 78, 78, 78, 78, 78, 61, 59, 57, 55, 53, 55, 56, 57, 59, 60, 61, 63,
+        65, 66, 67, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 77, 78, 78, 79, 79,
+        79, 79, 62, 60, 58, 56, 55, 56, 57, 58, 59, 61, 62, 64, 65, 66, 68, 69,
+        70, 71, 72, 73, 75, 75, 76, 77, 77, 78, 79, 79, 80, 80, 80, 80, 63, 61,
+        59, 57, 56, 57, 58, 59, 60, 61, 63, 64, 66, 67, 68, 70, 71, 72, 73, 74,
+        75, 76, 77, 77, 78, 79, 79, 80, 81, 81, 81, 81, 64, 62, 60, 58, 57, 58,
+        59, 60, 61, 62, 64, 65, 66, 68, 69, 70, 72, 73, 74, 75, 76, 77, 77, 78,
+        79, 80, 80, 81, 82, 82, 82, 82, 65, 63, 61, 60, 58, 59, 60, 61, 62, 63,
+        64, 66, 67, 68, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 80, 81, 82,
+        82, 82, 82, 82, 67, 64, 63, 61, 59, 60, 61, 62, 63, 64, 65, 66, 68, 69,
+        70, 71, 73, 74, 75, 76, 77, 78, 79, 79, 80, 81, 82, 82, 83, 83, 83, 83,
+        68, 66, 64, 62, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 73, 74,
+        75, 77, 78, 78, 79, 80, 81, 82, 82, 83, 84, 84, 84, 84, 69, 67, 65, 63,
+        62, 62, 63, 64, 65, 66, 67, 68, 69, 70, 72, 73, 74, 75, 76, 77, 78, 79,
+        80, 81, 82, 82, 83, 84, 84, 84, 84, 84, 69, 67, 65, 63, 62, 62, 63, 64,
+        65, 66, 67, 68, 69, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 82,
+        83, 84, 84, 84, 84, 84, 69, 67, 65, 63, 62, 62, 63, 64, 65, 66, 67, 68,
+        69, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 82, 83, 84, 84, 84,
+        84, 84, 69, 67, 65, 63, 62, 62, 63, 64, 65, 66, 67, 68, 69, 70, 72, 73,
+        74, 75, 76, 77, 78, 79, 80, 81, 82, 82, 83, 84, 84, 84, 84, 84 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 68, 92, 113, 68, 87, 103, 117, 92, 103, 119, 128, 113, 117, 128,
+        134,
+        /* Size 8 */
+        64, 53, 56, 67, 79, 91, 100, 107, 53, 58, 56, 63, 73, 84, 93, 101, 56,
+        56, 70, 77, 83, 91, 98, 105, 67, 63, 77, 87, 94, 99, 104, 109, 79, 73,
+        83, 94, 101, 106, 110, 113, 91, 84, 91, 99, 106, 110, 114, 116, 100, 93,
+        98, 104, 110, 114, 117, 119, 107, 101, 105, 109, 113, 116, 119, 121,
+        /* Size 16 */
+        64, 58, 53, 54, 56, 61, 67, 72, 79, 84, 91, 95, 100, 103, 107, 107, 58,
+        57, 55, 56, 56, 60, 65, 70, 76, 81, 87, 91, 96, 100, 104, 104, 53, 55,
+        58, 57, 56, 59, 63, 67, 73, 78, 84, 88, 93, 97, 101, 101, 54, 56, 57,
+        60, 63, 66, 69, 73, 78, 82, 87, 91, 96, 99, 103, 103, 56, 56, 56, 63,
+        70, 73, 77, 80, 83, 87, 91, 94, 98, 101, 105, 105, 61, 60, 59, 66, 73,
+        77, 81, 85, 88, 91, 95, 98, 101, 104, 107, 107, 67, 65, 63, 69, 77, 81,
+        87, 90, 94, 96, 99, 102, 104, 106, 109, 109, 72, 70, 67, 73, 80, 85, 90,
+        93, 97, 100, 102, 104, 107, 109, 111, 111, 79, 76, 73, 78, 83, 88, 94,
+        97, 101, 103, 106, 108, 110, 111, 113, 113, 84, 81, 78, 82, 87, 91, 96,
+        100, 103, 105, 108, 110, 112, 113, 115, 115, 91, 87, 84, 87, 91, 95, 99,
+        102, 106, 108, 110, 112, 114, 115, 116, 116, 95, 91, 88, 91, 94, 98,
+        102, 104, 108, 110, 112, 114, 115, 116, 118, 118, 100, 96, 93, 96, 98,
+        101, 104, 107, 110, 112, 114, 115, 117, 118, 119, 119, 103, 100, 97, 99,
+        101, 104, 106, 109, 111, 113, 115, 116, 118, 119, 120, 120, 107, 104,
+        101, 103, 105, 107, 109, 111, 113, 115, 116, 118, 119, 120, 121, 121,
+        107, 104, 101, 103, 105, 107, 109, 111, 113, 115, 116, 118, 119, 120,
+        121, 121,
+        /* Size 32 */
+        64, 61, 58, 55, 53, 54, 54, 55, 56, 58, 61, 64, 67, 70, 72, 76, 79, 82,
+        84, 87, 91, 93, 95, 97, 100, 102, 103, 105, 107, 107, 107, 107, 61, 59,
+        57, 56, 54, 55, 55, 56, 56, 58, 60, 63, 66, 68, 71, 74, 78, 80, 83, 86,
+        89, 91, 93, 96, 98, 100, 102, 104, 106, 106, 106, 106, 58, 57, 57, 56,
+        55, 56, 56, 56, 56, 58, 60, 62, 65, 67, 70, 73, 76, 78, 81, 84, 87, 89,
+        91, 94, 96, 98, 100, 102, 104, 104, 104, 104, 55, 56, 56, 56, 57, 57,
+        57, 56, 56, 58, 60, 62, 64, 66, 69, 71, 74, 77, 79, 82, 85, 87, 90, 92,
+        95, 97, 99, 101, 103, 103, 103, 103, 53, 54, 55, 57, 58, 58, 57, 57, 56,
+        58, 59, 61, 63, 65, 67, 70, 73, 75, 78, 81, 84, 86, 88, 91, 93, 95, 97,
+        99, 101, 101, 101, 101, 54, 55, 56, 57, 58, 58, 59, 59, 59, 61, 62, 64,
+        66, 68, 70, 73, 75, 77, 80, 82, 85, 87, 90, 92, 94, 96, 98, 100, 102,
+        102, 102, 102, 54, 55, 56, 57, 57, 59, 60, 61, 63, 64, 66, 67, 69, 71,
+        73, 75, 78, 80, 82, 84, 87, 89, 91, 93, 96, 97, 99, 101, 103, 103, 103,
+        103, 55, 56, 56, 56, 57, 59, 61, 63, 66, 68, 69, 71, 73, 74, 76, 78, 80,
+        82, 84, 87, 89, 91, 93, 95, 97, 99, 100, 102, 104, 104, 104, 104, 56,
+        56, 56, 56, 56, 59, 63, 66, 70, 72, 73, 75, 77, 78, 80, 81, 83, 85, 87,
+        89, 91, 93, 94, 96, 98, 100, 101, 103, 105, 105, 105, 105, 58, 58, 58,
+        58, 58, 61, 64, 68, 72, 73, 75, 77, 79, 80, 82, 84, 86, 87, 89, 91, 93,
+        94, 96, 98, 100, 101, 103, 104, 106, 106, 106, 106, 61, 60, 60, 60, 59,
+        62, 66, 69, 73, 75, 77, 79, 81, 83, 85, 86, 88, 90, 91, 93, 95, 96, 98,
+        99, 101, 102, 104, 105, 107, 107, 107, 107, 64, 63, 62, 62, 61, 64, 67,
+        71, 75, 77, 79, 82, 84, 86, 87, 89, 91, 92, 94, 95, 97, 98, 100, 101,
+        103, 104, 105, 106, 108, 108, 108, 108, 67, 66, 65, 64, 63, 66, 69, 73,
+        77, 79, 81, 84, 87, 88, 90, 92, 94, 95, 96, 98, 99, 100, 102, 103, 104,
+        105, 106, 108, 109, 109, 109, 109, 70, 68, 67, 66, 65, 68, 71, 74, 78,
+        80, 83, 86, 88, 90, 92, 93, 95, 97, 98, 99, 101, 102, 103, 104, 106,
+        107, 108, 109, 110, 110, 110, 110, 72, 71, 70, 69, 67, 70, 73, 76, 80,
+        82, 85, 87, 90, 92, 93, 95, 97, 98, 100, 101, 102, 103, 104, 106, 107,
+        108, 109, 110, 111, 111, 111, 111, 76, 74, 73, 71, 70, 73, 75, 78, 81,
+        84, 86, 89, 92, 93, 95, 97, 99, 100, 101, 103, 104, 105, 106, 107, 108,
+        109, 110, 111, 112, 112, 112, 112, 79, 78, 76, 74, 73, 75, 78, 80, 83,
+        86, 88, 91, 94, 95, 97, 99, 101, 102, 103, 104, 106, 107, 108, 109, 110,
+        110, 111, 112, 113, 113, 113, 113, 82, 80, 78, 77, 75, 77, 80, 82, 85,
+        87, 90, 92, 95, 97, 98, 100, 102, 103, 104, 106, 107, 108, 109, 110,
+        111, 111, 112, 113, 114, 114, 114, 114, 84, 83, 81, 79, 78, 80, 82, 84,
+        87, 89, 91, 94, 96, 98, 100, 101, 103, 104, 105, 107, 108, 109, 110,
+        111, 112, 112, 113, 114, 115, 115, 115, 115, 87, 86, 84, 82, 81, 82, 84,
+        87, 89, 91, 93, 95, 98, 99, 101, 103, 104, 106, 107, 108, 109, 110, 111,
+        112, 113, 113, 114, 115, 116, 116, 116, 116, 91, 89, 87, 85, 84, 85, 87,
+        89, 91, 93, 95, 97, 99, 101, 102, 104, 106, 107, 108, 109, 110, 111,
+        112, 113, 114, 114, 115, 116, 116, 116, 116, 116, 93, 91, 89, 87, 86,
+        87, 89, 91, 93, 94, 96, 98, 100, 102, 103, 105, 107, 108, 109, 110, 111,
+        112, 113, 114, 114, 115, 116, 116, 117, 117, 117, 117, 95, 93, 91, 90,
+        88, 90, 91, 93, 94, 96, 98, 100, 102, 103, 104, 106, 108, 109, 110, 111,
+        112, 113, 114, 114, 115, 116, 116, 117, 118, 118, 118, 118, 97, 96, 94,
+        92, 91, 92, 93, 95, 96, 98, 99, 101, 103, 104, 106, 107, 109, 110, 111,
+        112, 113, 114, 114, 115, 116, 117, 117, 118, 118, 118, 118, 118, 100,
+        98, 96, 95, 93, 94, 96, 97, 98, 100, 101, 103, 104, 106, 107, 108, 110,
+        111, 112, 113, 114, 114, 115, 116, 117, 117, 118, 118, 119, 119, 119,
+        119, 102, 100, 98, 97, 95, 96, 97, 99, 100, 101, 102, 104, 105, 107,
+        108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 117, 118, 118, 119,
+        119, 119, 119, 119, 103, 102, 100, 99, 97, 98, 99, 100, 101, 103, 104,
+        105, 106, 108, 109, 110, 111, 112, 113, 114, 115, 116, 116, 117, 118,
+        118, 119, 119, 120, 120, 120, 120, 105, 104, 102, 101, 99, 100, 101,
+        102, 103, 104, 105, 106, 108, 109, 110, 111, 112, 113, 114, 115, 116,
+        116, 117, 118, 118, 119, 119, 120, 120, 120, 120, 120, 107, 106, 104,
+        103, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113,
+        114, 115, 116, 116, 117, 118, 118, 119, 119, 120, 120, 121, 121, 121,
+        121, 107, 106, 104, 103, 101, 102, 103, 104, 105, 106, 107, 108, 109,
+        110, 111, 112, 113, 114, 115, 116, 116, 117, 118, 118, 119, 119, 120,
+        120, 121, 121, 121, 121, 107, 106, 104, 103, 101, 102, 103, 104, 105,
+        106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 116, 117, 118,
+        118, 119, 119, 120, 120, 121, 121, 121, 121, 107, 106, 104, 103, 101,
+        102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,
+        116, 116, 117, 118, 118, 119, 119, 120, 120, 121, 121, 121, 121 },
+      { /* Intra matrices */
+        /* Size 4 */
+        38, 40, 55, 69, 40, 52, 62, 72, 55, 62, 73, 79, 69, 72, 79, 83,
+        /* Size 8 */
+        43, 35, 37, 45, 53, 62, 68, 74, 35, 39, 37, 42, 49, 56, 64, 70, 37, 37,
+        47, 52, 56, 62, 67, 72, 45, 42, 52, 59, 64, 68, 72, 75, 53, 49, 56, 64,
+        69, 73, 76, 78, 62, 56, 62, 68, 73, 76, 79, 81, 68, 64, 67, 72, 76, 79,
+        81, 83, 74, 70, 72, 75, 78, 81, 83, 84,
+        /* Size 16 */
+        42, 38, 34, 35, 36, 40, 44, 48, 52, 56, 60, 63, 67, 70, 72, 72, 38, 37,
+        36, 36, 36, 39, 42, 46, 50, 54, 58, 61, 65, 67, 70, 70, 34, 36, 38, 37,
+        37, 39, 41, 44, 48, 51, 55, 59, 62, 65, 68, 68, 35, 36, 37, 39, 41, 43,
+        45, 48, 51, 54, 58, 61, 64, 67, 69, 69, 36, 36, 37, 41, 46, 48, 50, 53,
+        55, 58, 61, 63, 66, 68, 70, 70, 40, 39, 39, 43, 48, 51, 54, 56, 59, 61,
+        63, 66, 68, 70, 72, 72, 44, 42, 41, 45, 50, 54, 58, 60, 62, 64, 66, 68,
+        70, 72, 74, 74, 48, 46, 44, 48, 53, 56, 60, 62, 65, 67, 69, 70, 72, 74,
+        75, 75, 52, 50, 48, 51, 55, 59, 62, 65, 68, 69, 71, 73, 74, 75, 77, 77,
+        56, 54, 51, 54, 58, 61, 64, 67, 69, 71, 73, 74, 76, 77, 78, 78, 60, 58,
+        55, 58, 61, 63, 66, 69, 71, 73, 75, 76, 77, 78, 79, 79, 63, 61, 59, 61,
+        63, 66, 68, 70, 73, 74, 76, 77, 78, 79, 80, 80, 67, 65, 62, 64, 66, 68,
+        70, 72, 74, 76, 77, 78, 79, 80, 81, 81, 70, 67, 65, 67, 68, 70, 72, 74,
+        75, 77, 78, 79, 80, 81, 82, 82, 72, 70, 68, 69, 70, 72, 74, 75, 77, 78,
+        79, 80, 81, 82, 82, 82, 72, 70, 68, 69, 70, 72, 74, 75, 77, 78, 79, 80,
+        81, 82, 82, 82,
+        /* Size 32 */
+        41, 39, 37, 35, 34, 34, 35, 35, 36, 37, 39, 41, 43, 45, 47, 49, 52, 54,
+        55, 57, 60, 61, 63, 65, 66, 68, 69, 70, 72, 72, 72, 72, 39, 38, 37, 36,
+        35, 35, 35, 36, 36, 37, 39, 41, 43, 44, 46, 48, 51, 52, 54, 56, 58, 60,
+        62, 63, 65, 66, 68, 69, 70, 70, 70, 70, 37, 37, 36, 36, 36, 36, 36, 36,
+        36, 37, 39, 40, 42, 43, 45, 47, 49, 51, 53, 55, 57, 59, 60, 62, 64, 65,
+        67, 68, 69, 69, 69, 69, 35, 36, 36, 36, 36, 36, 36, 36, 36, 37, 38, 40,
+        41, 43, 44, 46, 48, 50, 52, 54, 56, 57, 59, 61, 63, 64, 65, 67, 68, 68,
+        68, 68, 34, 35, 36, 36, 37, 37, 37, 37, 36, 37, 38, 39, 40, 42, 44, 45,
+        47, 49, 51, 53, 55, 56, 58, 60, 62, 63, 64, 66, 67, 67, 67, 67, 34, 35,
+        36, 36, 37, 37, 38, 38, 38, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 54,
+        56, 57, 59, 61, 62, 64, 65, 66, 68, 68, 68, 68, 35, 35, 36, 36, 37, 38,
+        38, 39, 40, 41, 42, 44, 45, 46, 48, 49, 51, 52, 54, 55, 57, 59, 60, 62,
+        63, 65, 66, 67, 69, 69, 69, 69, 35, 36, 36, 36, 37, 38, 39, 41, 43, 44,
+        45, 46, 47, 48, 50, 51, 53, 54, 55, 57, 59, 60, 61, 63, 64, 65, 67, 68,
+        69, 69, 69, 69, 36, 36, 36, 36, 36, 38, 40, 43, 45, 46, 48, 49, 50, 51,
+        52, 53, 55, 56, 57, 58, 60, 61, 62, 64, 65, 66, 67, 69, 70, 70, 70, 70,
+        37, 37, 37, 37, 37, 39, 41, 44, 46, 48, 49, 50, 52, 53, 54, 55, 56, 57,
+        59, 60, 61, 62, 64, 65, 66, 67, 68, 69, 71, 71, 71, 71, 39, 39, 39, 38,
+        38, 40, 42, 45, 48, 49, 50, 52, 53, 54, 56, 57, 58, 59, 60, 61, 63, 64,
+        65, 66, 67, 68, 69, 70, 71, 71, 71, 71, 41, 41, 40, 40, 39, 41, 44, 46,
+        49, 50, 52, 53, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
+        70, 71, 72, 72, 72, 72, 43, 43, 42, 41, 40, 42, 45, 47, 50, 52, 53, 55,
+        57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 68, 69, 70, 71, 72, 73, 73,
+        73, 73, 45, 44, 43, 43, 42, 44, 46, 48, 51, 53, 54, 56, 58, 59, 61, 62,
+        63, 64, 65, 66, 67, 68, 69, 69, 70, 71, 72, 73, 74, 74, 74, 74, 47, 46,
+        45, 44, 44, 45, 48, 50, 52, 54, 56, 57, 59, 61, 62, 63, 64, 65, 66, 67,
+        68, 69, 70, 71, 71, 72, 73, 74, 74, 74, 74, 74, 49, 48, 47, 46, 45, 47,
+        49, 51, 53, 55, 57, 59, 61, 62, 63, 64, 66, 66, 67, 68, 69, 70, 71, 72,
+        72, 73, 74, 74, 75, 75, 75, 75, 52, 51, 49, 48, 47, 49, 51, 53, 55, 56,
+        58, 60, 62, 63, 64, 66, 67, 68, 69, 70, 71, 71, 72, 73, 73, 74, 75, 75,
+        76, 76, 76, 76, 54, 52, 51, 50, 49, 51, 52, 54, 56, 57, 59, 61, 63, 64,
+        65, 66, 68, 69, 70, 70, 71, 72, 73, 73, 74, 75, 75, 76, 76, 76, 76, 76,
+        55, 54, 53, 52, 51, 52, 54, 55, 57, 59, 60, 62, 64, 65, 66, 67, 69, 70,
+        70, 71, 72, 73, 73, 74, 75, 75, 76, 77, 77, 77, 77, 77, 57, 56, 55, 54,
+        53, 54, 55, 57, 58, 60, 61, 63, 65, 66, 67, 68, 70, 70, 71, 72, 73, 74,
+        74, 75, 76, 76, 77, 77, 78, 78, 78, 78, 60, 58, 57, 56, 55, 56, 57, 59,
+        60, 61, 63, 64, 66, 67, 68, 69, 71, 71, 72, 73, 74, 75, 75, 76, 76, 77,
+        77, 78, 78, 78, 78, 78, 61, 60, 59, 57, 56, 57, 59, 60, 61, 62, 64, 65,
+        67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 76, 77, 77, 78, 78, 79, 79,
+        79, 79, 63, 62, 60, 59, 58, 59, 60, 61, 62, 64, 65, 66, 68, 69, 70, 71,
+        72, 73, 73, 74, 75, 76, 76, 77, 77, 78, 78, 79, 79, 79, 79, 79, 65, 63,
+        62, 61, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73, 73, 74, 75,
+        76, 76, 77, 77, 78, 78, 79, 79, 80, 80, 80, 80, 66, 65, 64, 63, 62, 62,
+        63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 76, 77, 77, 78,
+        79, 79, 79, 80, 80, 80, 80, 80, 68, 66, 65, 64, 63, 64, 65, 65, 66, 67,
+        68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 77, 78, 78, 79, 79, 80, 80,
+        81, 81, 81, 81, 69, 68, 67, 65, 64, 65, 66, 67, 67, 68, 69, 70, 71, 72,
+        73, 74, 75, 75, 76, 77, 77, 78, 78, 79, 79, 80, 80, 81, 81, 81, 81, 81,
+        70, 69, 68, 67, 66, 66, 67, 68, 69, 69, 70, 71, 72, 73, 74, 74, 75, 76,
+        77, 77, 78, 78, 79, 79, 80, 80, 81, 81, 81, 81, 81, 81, 72, 70, 69, 68,
+        67, 68, 69, 69, 70, 71, 71, 72, 73, 74, 74, 75, 76, 76, 77, 78, 78, 79,
+        79, 80, 80, 81, 81, 81, 82, 82, 82, 82, 72, 70, 69, 68, 67, 68, 69, 69,
+        70, 71, 71, 72, 73, 74, 74, 75, 76, 76, 77, 78, 78, 79, 79, 80, 80, 81,
+        81, 81, 82, 82, 82, 82, 72, 70, 69, 68, 67, 68, 69, 69, 70, 71, 71, 72,
+        73, 74, 74, 75, 76, 76, 77, 78, 78, 79, 79, 80, 80, 81, 81, 81, 82, 82,
+        82, 82, 72, 70, 69, 68, 67, 68, 69, 69, 70, 71, 71, 72, 73, 74, 74, 75,
+        76, 76, 77, 78, 78, 79, 79, 80, 80, 81, 81, 81, 82, 82, 82, 82 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 86, 90, 101, 86, 95, 98, 105, 90, 98, 110, 117, 101, 105, 117, 125,
+        /* Size 8 */
+        64, 55, 81, 84, 87, 92, 98, 104, 55, 71, 82, 77, 79, 83, 89, 95, 81, 82,
+        89, 88, 88, 90, 94, 99, 84, 77, 88, 94, 96, 98, 101, 104, 87, 79, 88,
+        96, 101, 104, 107, 110, 92, 83, 90, 98, 104, 109, 112, 115, 98, 89, 94,
+        101, 107, 112, 116, 118, 104, 95, 99, 104, 110, 115, 118, 121,
+        /* Size 16 */
+        64, 59, 55, 66, 81, 82, 84, 85, 87, 90, 92, 95, 98, 101, 104, 104, 59,
+        61, 62, 70, 82, 81, 80, 82, 83, 85, 87, 90, 93, 96, 99, 99, 55, 62, 71,
+        76, 82, 80, 77, 78, 79, 81, 83, 86, 89, 92, 95, 95, 66, 70, 76, 80, 86,
+        84, 82, 83, 83, 85, 86, 89, 91, 94, 97, 97, 81, 82, 82, 86, 89, 89, 88,
+        88, 88, 89, 90, 92, 94, 96, 99, 99, 82, 81, 80, 84, 89, 90, 91, 91, 92,
+        93, 94, 96, 97, 99, 101, 101, 84, 80, 77, 82, 88, 91, 94, 95, 96, 97,
+        98, 99, 101, 102, 104, 104, 85, 82, 78, 83, 88, 91, 95, 97, 98, 100,
+        101, 102, 104, 105, 107, 107, 87, 83, 79, 83, 88, 92, 96, 98, 101, 103,
+        104, 106, 107, 108, 110, 110, 90, 85, 81, 85, 89, 93, 97, 100, 103, 105,
+        107, 108, 109, 111, 112, 112, 92, 87, 83, 86, 90, 94, 98, 101, 104, 107,
+        109, 110, 112, 113, 115, 115, 95, 90, 86, 89, 92, 96, 99, 102, 106, 108,
+        110, 112, 114, 115, 116, 116, 98, 93, 89, 91, 94, 97, 101, 104, 107,
+        109, 112, 114, 116, 117, 118, 118, 101, 96, 92, 94, 96, 99, 102, 105,
+        108, 111, 113, 115, 117, 118, 120, 120, 104, 99, 95, 97, 99, 101, 104,
+        107, 110, 112, 115, 116, 118, 120, 121, 121, 104, 99, 95, 97, 99, 101,
+        104, 107, 110, 112, 115, 116, 118, 120, 121, 121,
+        /* Size 32 */
+        64, 62, 59, 57, 55, 60, 66, 73, 81, 82, 82, 83, 84, 85, 85, 86, 87, 88,
+        90, 91, 92, 94, 95, 97, 98, 100, 101, 102, 104, 104, 104, 104, 62, 61,
+        60, 59, 58, 63, 68, 74, 81, 82, 82, 82, 82, 83, 83, 84, 85, 86, 87, 88,
+        90, 91, 93, 94, 96, 97, 98, 100, 101, 101, 101, 101, 59, 60, 61, 61, 62,
+        66, 70, 76, 82, 81, 81, 81, 80, 81, 82, 82, 83, 84, 85, 86, 87, 89, 90,
+        92, 93, 95, 96, 98, 99, 99, 99, 99, 57, 59, 61, 64, 66, 69, 73, 77, 82,
+        81, 80, 80, 79, 79, 80, 80, 81, 82, 83, 84, 85, 87, 88, 89, 91, 92, 94,
+        95, 97, 97, 97, 97, 55, 58, 62, 66, 71, 73, 76, 79, 82, 81, 80, 78, 77,
+        78, 78, 78, 79, 80, 81, 82, 83, 84, 86, 87, 89, 90, 92, 93, 95, 95, 95,
+        95, 60, 63, 66, 69, 73, 76, 78, 81, 84, 83, 82, 81, 80, 80, 80, 81, 81,
+        82, 83, 84, 85, 86, 87, 89, 90, 91, 93, 94, 96, 96, 96, 96, 66, 68, 70,
+        73, 76, 78, 80, 83, 86, 85, 84, 83, 82, 82, 83, 83, 83, 84, 85, 86, 86,
+        88, 89, 90, 91, 93, 94, 95, 97, 97, 97, 97, 73, 74, 76, 77, 79, 81, 83,
+        85, 87, 87, 86, 86, 85, 85, 85, 85, 85, 86, 87, 87, 88, 89, 90, 92, 93,
+        94, 95, 96, 98, 98, 98, 98, 81, 81, 82, 82, 82, 84, 86, 87, 89, 89, 89,
+        88, 88, 88, 88, 88, 88, 88, 89, 89, 90, 91, 92, 93, 94, 95, 96, 98, 99,
+        99, 99, 99, 82, 82, 81, 81, 81, 83, 85, 87, 89, 89, 89, 89, 89, 89, 89,
+        89, 90, 90, 91, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 100, 100, 100,
+        82, 82, 81, 80, 80, 82, 84, 86, 89, 89, 90, 90, 91, 91, 91, 91, 92, 92,
+        93, 93, 94, 95, 96, 96, 97, 98, 99, 100, 101, 101, 101, 101, 83, 82, 81,
+        80, 78, 81, 83, 86, 88, 89, 90, 91, 92, 92, 93, 93, 94, 94, 95, 95, 96,
+        97, 97, 98, 99, 100, 101, 102, 103, 103, 103, 103, 84, 82, 80, 79, 77,
+        80, 82, 85, 88, 89, 91, 92, 94, 94, 95, 95, 96, 96, 97, 97, 98, 99, 99,
+        100, 101, 102, 102, 103, 104, 104, 104, 104, 85, 83, 81, 79, 78, 80, 82,
+        85, 88, 89, 91, 92, 94, 95, 96, 96, 97, 98, 98, 99, 99, 100, 101, 102,
+        102, 103, 104, 105, 106, 106, 106, 106, 85, 83, 82, 80, 78, 80, 83, 85,
+        88, 89, 91, 93, 95, 96, 97, 98, 98, 99, 100, 100, 101, 102, 102, 103,
+        104, 105, 105, 106, 107, 107, 107, 107, 86, 84, 82, 80, 78, 81, 83, 85,
+        88, 89, 91, 93, 95, 96, 98, 99, 100, 100, 101, 102, 103, 103, 104, 105,
+        105, 106, 107, 108, 108, 108, 108, 108, 87, 85, 83, 81, 79, 81, 83, 85,
+        88, 90, 92, 94, 96, 97, 98, 100, 101, 102, 103, 104, 104, 105, 106, 106,
+        107, 108, 108, 109, 110, 110, 110, 110, 88, 86, 84, 82, 80, 82, 84, 86,
+        88, 90, 92, 94, 96, 98, 99, 100, 102, 103, 104, 105, 105, 106, 107, 108,
+        108, 109, 110, 110, 111, 111, 111, 111, 90, 87, 85, 83, 81, 83, 85, 87,
+        89, 91, 93, 95, 97, 98, 100, 101, 103, 104, 105, 106, 107, 107, 108,
+        109, 109, 110, 111, 111, 112, 112, 112, 112, 91, 88, 86, 84, 82, 84, 86,
+        87, 89, 91, 93, 95, 97, 99, 100, 102, 104, 105, 106, 107, 108, 108, 109,
+        110, 111, 111, 112, 113, 113, 113, 113, 113, 92, 90, 87, 85, 83, 85, 86,
+        88, 90, 92, 94, 96, 98, 99, 101, 103, 104, 105, 107, 108, 109, 110, 110,
+        111, 112, 113, 113, 114, 115, 115, 115, 115, 94, 91, 89, 87, 84, 86, 88,
+        89, 91, 93, 95, 97, 99, 100, 102, 103, 105, 106, 107, 108, 110, 110,
+        111, 112, 113, 114, 114, 115, 116, 116, 116, 116, 95, 93, 90, 88, 86,
+        87, 89, 90, 92, 94, 96, 97, 99, 101, 102, 104, 106, 107, 108, 109, 110,
+        111, 112, 113, 114, 114, 115, 116, 116, 116, 116, 116, 97, 94, 92, 89,
+        87, 89, 90, 92, 93, 95, 96, 98, 100, 102, 103, 105, 106, 108, 109, 110,
+        111, 112, 113, 114, 115, 115, 116, 117, 117, 117, 117, 117, 98, 96, 93,
+        91, 89, 90, 91, 93, 94, 96, 97, 99, 101, 102, 104, 105, 107, 108, 109,
+        111, 112, 113, 114, 115, 116, 116, 117, 118, 118, 118, 118, 118, 100,
+        97, 95, 92, 90, 91, 93, 94, 95, 97, 98, 100, 102, 103, 105, 106, 108,
+        109, 110, 111, 113, 114, 114, 115, 116, 117, 118, 118, 119, 119, 119,
+        119, 101, 98, 96, 94, 92, 93, 94, 95, 96, 98, 99, 101, 102, 104, 105,
+        107, 108, 110, 111, 112, 113, 114, 115, 116, 117, 118, 118, 119, 120,
+        120, 120, 120, 102, 100, 98, 95, 93, 94, 95, 96, 98, 99, 100, 102, 103,
+        105, 106, 108, 109, 110, 111, 113, 114, 115, 116, 117, 118, 118, 119,
+        120, 121, 121, 121, 121, 104, 101, 99, 97, 95, 96, 97, 98, 99, 100, 101,
+        103, 104, 106, 107, 108, 110, 111, 112, 113, 115, 116, 116, 117, 118,
+        119, 120, 121, 121, 121, 121, 121, 104, 101, 99, 97, 95, 96, 97, 98, 99,
+        100, 101, 103, 104, 106, 107, 108, 110, 111, 112, 113, 115, 116, 116,
+        117, 118, 119, 120, 121, 121, 121, 121, 121, 104, 101, 99, 97, 95, 96,
+        97, 98, 99, 100, 101, 103, 104, 106, 107, 108, 110, 111, 112, 113, 115,
+        116, 116, 117, 118, 119, 120, 121, 121, 121, 121, 121, 104, 101, 99, 97,
+        95, 96, 97, 98, 99, 100, 101, 103, 104, 106, 107, 108, 110, 111, 112,
+        113, 115, 116, 116, 117, 118, 119, 120, 121, 121, 121, 121, 121 },
+      { /* Intra matrices */
+        /* Size 4 */
+        40, 54, 57, 65, 54, 60, 62, 67, 57, 62, 71, 75, 65, 67, 75, 81,
+        /* Size 8 */
+        42, 36, 54, 56, 58, 62, 66, 70, 36, 47, 54, 51, 52, 55, 59, 64, 54, 54,
+        60, 59, 58, 60, 63, 66, 56, 51, 59, 63, 64, 66, 68, 70, 58, 52, 58, 64,
+        68, 70, 72, 74, 62, 55, 60, 66, 70, 74, 76, 78, 66, 59, 63, 68, 72, 76,
+        79, 81, 70, 64, 66, 70, 74, 78, 81, 83,
+        /* Size 16 */
+        41, 38, 35, 43, 53, 54, 55, 56, 57, 59, 61, 63, 65, 67, 69, 69, 38, 39,
+        40, 46, 53, 53, 53, 53, 54, 56, 57, 59, 62, 64, 66, 66, 35, 40, 46, 49,
+        54, 52, 51, 51, 51, 53, 54, 56, 58, 60, 63, 63, 43, 46, 49, 53, 56, 55,
+        54, 54, 54, 56, 57, 58, 60, 62, 64, 64, 53, 53, 54, 56, 59, 58, 58, 58,
+        58, 58, 59, 61, 62, 64, 65, 65, 54, 53, 52, 55, 58, 59, 60, 60, 60, 61,
+        62, 63, 64, 66, 67, 67, 55, 53, 51, 54, 58, 60, 62, 63, 63, 64, 65, 66,
+        67, 68, 69, 69, 56, 53, 51, 54, 58, 60, 63, 64, 65, 66, 67, 68, 69, 70,
+        71, 71, 57, 54, 51, 54, 58, 60, 63, 65, 67, 68, 69, 70, 71, 72, 73, 73,
+        59, 56, 53, 56, 58, 61, 64, 66, 68, 70, 71, 72, 73, 74, 75, 75, 61, 57,
+        54, 57, 59, 62, 65, 67, 69, 71, 73, 74, 75, 76, 77, 77, 63, 59, 56, 58,
+        61, 63, 66, 68, 70, 72, 74, 75, 76, 77, 78, 78, 65, 62, 58, 60, 62, 64,
+        67, 69, 71, 73, 75, 76, 78, 79, 80, 80, 67, 64, 60, 62, 64, 66, 68, 70,
+        72, 74, 76, 77, 79, 80, 81, 81, 69, 66, 63, 64, 65, 67, 69, 71, 73, 75,
+        77, 78, 80, 81, 82, 82, 69, 66, 63, 64, 65, 67, 69, 71, 73, 75, 77, 78,
+        80, 81, 82, 82,
+        /* Size 32 */
+        41, 39, 38, 37, 35, 38, 42, 47, 53, 53, 54, 54, 55, 55, 56, 56, 57, 58,
+        59, 59, 60, 61, 62, 63, 65, 65, 66, 68, 69, 69, 69, 69, 39, 39, 38, 38,
+        37, 40, 44, 48, 53, 53, 53, 53, 53, 54, 54, 55, 55, 56, 57, 58, 59, 60,
+        61, 62, 63, 64, 65, 66, 67, 67, 67, 67, 38, 38, 39, 39, 40, 42, 45, 49,
+        53, 53, 53, 52, 52, 53, 53, 53, 54, 55, 55, 56, 57, 58, 59, 60, 61, 62,
+        63, 64, 65, 65, 65, 65, 37, 38, 39, 41, 42, 45, 47, 50, 53, 53, 52, 52,
+        51, 51, 52, 52, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 63, 64, 64,
+        64, 64, 35, 37, 40, 42, 46, 47, 49, 51, 53, 52, 52, 51, 50, 50, 51, 51,
+        51, 52, 53, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 62, 62, 62, 38, 40,
+        42, 45, 47, 49, 51, 52, 54, 54, 53, 52, 52, 52, 52, 52, 52, 53, 54, 54,
+        55, 56, 57, 58, 59, 60, 61, 62, 63, 63, 63, 63, 42, 44, 45, 47, 49, 51,
+        52, 54, 56, 55, 55, 54, 53, 54, 54, 54, 54, 55, 55, 56, 56, 57, 58, 59,
+        60, 61, 62, 63, 64, 64, 64, 64, 47, 48, 49, 50, 51, 52, 54, 55, 57, 57,
+        56, 56, 55, 55, 55, 55, 55, 56, 57, 57, 58, 58, 59, 60, 61, 62, 62, 63,
+        64, 64, 64, 64, 53, 53, 53, 53, 53, 54, 56, 57, 58, 58, 58, 58, 57, 57,
+        57, 57, 57, 58, 58, 58, 59, 60, 60, 61, 62, 62, 63, 64, 65, 65, 65, 65,
+        53, 53, 53, 53, 52, 54, 55, 57, 58, 58, 58, 58, 58, 58, 58, 58, 59, 59,
+        59, 60, 60, 61, 61, 62, 63, 64, 64, 65, 66, 66, 66, 66, 54, 53, 53, 52,
+        52, 53, 55, 56, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 61, 61, 61, 62,
+        63, 63, 64, 65, 65, 66, 67, 67, 67, 67, 54, 53, 52, 52, 51, 52, 54, 56,
+        58, 58, 59, 60, 60, 61, 61, 61, 61, 62, 62, 63, 63, 63, 64, 65, 65, 66,
+        66, 67, 68, 68, 68, 68, 55, 53, 52, 51, 50, 52, 53, 55, 57, 58, 59, 60,
+        61, 62, 62, 63, 63, 63, 64, 64, 64, 65, 65, 66, 66, 67, 68, 68, 69, 69,
+        69, 69, 55, 54, 53, 51, 50, 52, 54, 55, 57, 58, 59, 61, 62, 62, 63, 63,
+        64, 64, 65, 65, 65, 66, 66, 67, 67, 68, 69, 69, 70, 70, 70, 70, 56, 54,
+        53, 52, 51, 52, 54, 55, 57, 58, 60, 61, 62, 63, 63, 64, 65, 65, 66, 66,
+        67, 67, 68, 68, 69, 69, 70, 70, 71, 71, 71, 71, 56, 55, 53, 52, 51, 52,
+        54, 55, 57, 58, 60, 61, 63, 63, 64, 65, 66, 66, 67, 67, 68, 68, 69, 69,
+        70, 70, 71, 71, 72, 72, 72, 72, 57, 55, 54, 52, 51, 52, 54, 55, 57, 59,
+        60, 61, 63, 64, 65, 66, 67, 67, 68, 68, 69, 69, 70, 70, 71, 71, 72, 72,
+        73, 73, 73, 73, 58, 56, 55, 53, 52, 53, 55, 56, 58, 59, 60, 62, 63, 64,
+        65, 66, 67, 68, 68, 69, 70, 70, 71, 71, 72, 72, 73, 73, 74, 74, 74, 74,
+        59, 57, 55, 54, 53, 54, 55, 57, 58, 59, 61, 62, 64, 65, 66, 67, 68, 68,
+        69, 70, 70, 71, 71, 72, 73, 73, 73, 74, 74, 74, 74, 74, 59, 58, 56, 55,
+        53, 54, 56, 57, 58, 60, 61, 63, 64, 65, 66, 67, 68, 69, 70, 71, 71, 72,
+        72, 73, 73, 74, 74, 75, 75, 75, 75, 75, 60, 59, 57, 56, 54, 55, 56, 58,
+        59, 60, 61, 63, 64, 65, 67, 68, 69, 70, 70, 71, 72, 73, 73, 74, 74, 75,
+        75, 76, 76, 76, 76, 76, 61, 60, 58, 56, 55, 56, 57, 58, 60, 61, 62, 63,
+        65, 66, 67, 68, 69, 70, 71, 72, 73, 73, 74, 74, 75, 75, 76, 76, 77, 77,
+        77, 77, 62, 61, 59, 57, 56, 57, 58, 59, 60, 61, 63, 64, 65, 66, 68, 69,
+        70, 71, 71, 72, 73, 74, 74, 75, 76, 76, 77, 77, 78, 78, 78, 78, 63, 62,
+        60, 58, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 70, 71, 72, 73,
+        74, 74, 75, 76, 76, 77, 77, 78, 78, 78, 78, 78, 65, 63, 61, 59, 58, 59,
+        60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 73, 73, 74, 75, 76, 76,
+        77, 77, 78, 78, 79, 79, 79, 79, 65, 64, 62, 60, 59, 60, 61, 62, 62, 64,
+        65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 77, 78, 78, 79,
+        79, 79, 79, 79, 66, 65, 63, 61, 60, 61, 62, 62, 63, 64, 65, 66, 68, 69,
+        70, 71, 72, 73, 73, 74, 75, 76, 77, 77, 78, 78, 79, 79, 80, 80, 80, 80,
+        68, 66, 64, 63, 61, 62, 63, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,
+        74, 75, 76, 76, 77, 78, 78, 79, 79, 80, 81, 81, 81, 81, 69, 67, 65, 64,
+        62, 63, 64, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 74, 75, 76, 77,
+        78, 78, 79, 79, 80, 81, 81, 81, 81, 81, 69, 67, 65, 64, 62, 63, 64, 64,
+        65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 74, 75, 76, 77, 78, 78, 79, 79,
+        80, 81, 81, 81, 81, 81, 69, 67, 65, 64, 62, 63, 64, 64, 65, 66, 67, 68,
+        69, 70, 71, 72, 73, 74, 74, 75, 76, 77, 78, 78, 79, 79, 80, 81, 81, 81,
+        81, 81, 69, 67, 65, 64, 62, 63, 64, 64, 65, 66, 67, 68, 69, 70, 71, 72,
+        73, 74, 74, 75, 76, 77, 78, 78, 79, 79, 80, 81, 81, 81, 81, 81 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 67, 88, 105, 67, 84, 97, 108, 88, 97, 109, 116, 105, 108, 116, 120,
+        /* Size 8 */
+        64, 54, 57, 66, 77, 86, 94, 99, 54, 59, 57, 63, 72, 81, 88, 95, 57, 57,
+        69, 75, 80, 87, 92, 97, 66, 63, 75, 83, 89, 93, 97, 101, 77, 72, 80, 89,
+        94, 98, 101, 104, 86, 81, 87, 93, 98, 102, 104, 106, 94, 88, 92, 97,
+        101, 104, 106, 108, 99, 95, 97, 101, 104, 106, 108, 109,
+        /* Size 16 */
+        64, 59, 54, 55, 57, 61, 66, 71, 77, 81, 86, 90, 94, 96, 99, 99, 59, 57,
+        56, 57, 57, 61, 65, 69, 74, 79, 83, 87, 91, 94, 97, 97, 54, 56, 59, 58,
+        57, 60, 63, 67, 72, 76, 81, 84, 88, 91, 95, 95, 55, 57, 58, 60, 63, 65,
+        68, 72, 76, 79, 83, 87, 90, 93, 96, 96, 57, 57, 57, 63, 69, 72, 75, 78,
+        80, 83, 87, 89, 92, 95, 97, 97, 61, 61, 60, 65, 72, 75, 79, 82, 84, 87,
+        90, 92, 95, 97, 99, 99, 66, 65, 63, 68, 75, 79, 83, 86, 89, 91, 93, 95,
+        97, 99, 101, 101, 71, 69, 67, 72, 78, 82, 86, 89, 91, 93, 95, 97, 99,
+        100, 102, 102, 77, 74, 72, 76, 80, 84, 89, 91, 94, 96, 98, 100, 101,
+        102, 104, 104, 81, 79, 76, 79, 83, 87, 91, 93, 96, 98, 100, 101, 103,
+        104, 105, 105, 86, 83, 81, 83, 87, 90, 93, 95, 98, 100, 102, 103, 104,
+        105, 106, 106, 90, 87, 84, 87, 89, 92, 95, 97, 100, 101, 103, 104, 105,
+        106, 107, 107, 94, 91, 88, 90, 92, 95, 97, 99, 101, 103, 104, 105, 106,
+        107, 108, 108, 96, 94, 91, 93, 95, 97, 99, 100, 102, 104, 105, 106, 107,
+        108, 109, 109, 99, 97, 95, 96, 97, 99, 101, 102, 104, 105, 106, 107,
+        108, 109, 109, 109, 99, 97, 95, 96, 97, 99, 101, 102, 104, 105, 106,
+        107, 108, 109, 109, 109,
+        /* Size 32 */
+        64, 61, 59, 56, 54, 55, 55, 56, 57, 59, 61, 64, 66, 69, 71, 74, 77, 79,
+        81, 84, 86, 88, 90, 92, 94, 95, 96, 98, 99, 99, 99, 99, 61, 60, 58, 57,
+        55, 56, 56, 56, 57, 59, 61, 63, 66, 68, 70, 73, 76, 78, 80, 82, 85, 87,
+        88, 90, 92, 94, 95, 97, 98, 98, 98, 98, 59, 58, 57, 57, 56, 56, 57, 57,
+        57, 59, 61, 63, 65, 67, 69, 72, 74, 76, 79, 81, 83, 85, 87, 89, 91, 92,
+        94, 95, 97, 97, 97, 97, 56, 57, 57, 57, 58, 57, 57, 57, 57, 59, 60, 62,
+        64, 66, 68, 70, 73, 75, 77, 79, 82, 84, 86, 88, 90, 91, 93, 94, 96, 96,
+        96, 96, 54, 55, 56, 58, 59, 58, 58, 58, 57, 59, 60, 61, 63, 65, 67, 69,
+        72, 74, 76, 78, 81, 82, 84, 86, 88, 90, 91, 93, 95, 95, 95, 95, 55, 56,
+        56, 57, 58, 59, 59, 59, 60, 61, 63, 64, 65, 67, 69, 71, 74, 76, 78, 80,
+        82, 84, 86, 87, 89, 91, 92, 94, 95, 95, 95, 95, 55, 56, 57, 57, 58, 59,
+        60, 61, 63, 64, 65, 67, 68, 70, 72, 74, 76, 78, 79, 81, 83, 85, 87, 89,
+        90, 92, 93, 95, 96, 96, 96, 96, 56, 56, 57, 57, 58, 59, 61, 64, 66, 67,
+        68, 70, 71, 73, 75, 76, 78, 80, 81, 83, 85, 86, 88, 90, 91, 93, 94, 95,
+        97, 97, 97, 97, 57, 57, 57, 57, 57, 60, 63, 66, 69, 71, 72, 73, 75, 76,
+        78, 79, 80, 82, 83, 85, 87, 88, 89, 91, 92, 94, 95, 96, 97, 97, 97, 97,
+        59, 59, 59, 59, 59, 61, 64, 67, 71, 72, 74, 75, 77, 78, 79, 81, 82, 84,
+        85, 87, 88, 89, 91, 92, 93, 95, 96, 97, 98, 98, 98, 98, 61, 61, 61, 60,
+        60, 63, 65, 68, 72, 74, 75, 77, 79, 80, 82, 83, 84, 86, 87, 88, 90, 91,
+        92, 93, 95, 96, 97, 98, 99, 99, 99, 99, 64, 63, 63, 62, 61, 64, 67, 70,
+        73, 75, 77, 79, 81, 82, 84, 85, 86, 88, 89, 90, 91, 92, 93, 95, 96, 97,
+        98, 99, 100, 100, 100, 100, 66, 66, 65, 64, 63, 65, 68, 71, 75, 77, 79,
+        81, 83, 85, 86, 87, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
+        101, 101, 101, 101, 69, 68, 67, 66, 65, 67, 70, 73, 76, 78, 80, 82, 85,
+        86, 87, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 100, 101, 101,
+        101, 101, 71, 70, 69, 68, 67, 69, 72, 75, 78, 79, 82, 84, 86, 87, 89,
+        90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 100, 101, 102, 102, 102,
+        102, 74, 73, 72, 70, 69, 71, 74, 76, 79, 81, 83, 85, 87, 89, 90, 91, 93,
+        94, 95, 96, 97, 98, 98, 99, 100, 101, 101, 102, 103, 103, 103, 103, 77,
+        76, 74, 73, 72, 74, 76, 78, 80, 82, 84, 86, 89, 90, 91, 93, 94, 95, 96,
+        97, 98, 99, 100, 100, 101, 102, 102, 103, 104, 104, 104, 104, 79, 78,
+        76, 75, 74, 76, 78, 80, 82, 84, 86, 88, 90, 91, 92, 94, 95, 96, 97, 98,
+        99, 100, 100, 101, 102, 102, 103, 104, 104, 104, 104, 104, 81, 80, 79,
+        77, 76, 78, 79, 81, 83, 85, 87, 89, 91, 92, 93, 95, 96, 97, 98, 99, 100,
+        101, 101, 102, 103, 103, 104, 104, 105, 105, 105, 105, 84, 82, 81, 79,
+        78, 80, 81, 83, 85, 87, 88, 90, 92, 93, 94, 96, 97, 98, 99, 100, 101,
+        101, 102, 103, 103, 104, 104, 105, 105, 105, 105, 105, 86, 85, 83, 82,
+        81, 82, 83, 85, 87, 88, 90, 91, 93, 94, 95, 97, 98, 99, 100, 101, 102,
+        102, 103, 103, 104, 105, 105, 106, 106, 106, 106, 106, 88, 87, 85, 84,
+        82, 84, 85, 86, 88, 89, 91, 92, 94, 95, 96, 98, 99, 100, 101, 101, 102,
+        103, 103, 104, 105, 105, 106, 106, 107, 107, 107, 107, 90, 88, 87, 86,
+        84, 86, 87, 88, 89, 91, 92, 93, 95, 96, 97, 98, 100, 100, 101, 102, 103,
+        103, 104, 105, 105, 106, 106, 107, 107, 107, 107, 107, 92, 90, 89, 88,
+        86, 87, 89, 90, 91, 92, 93, 95, 96, 97, 98, 99, 100, 101, 102, 103, 103,
+        104, 105, 105, 106, 106, 107, 107, 107, 107, 107, 107, 94, 92, 91, 90,
+        88, 89, 90, 91, 92, 93, 95, 96, 97, 98, 99, 100, 101, 102, 103, 103,
+        104, 105, 105, 106, 106, 107, 107, 108, 108, 108, 108, 108, 95, 94, 92,
+        91, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 102, 103,
+        104, 105, 105, 106, 106, 107, 107, 107, 108, 108, 108, 108, 108, 96, 95,
+        94, 93, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 100, 101, 102, 103,
+        104, 104, 105, 106, 106, 107, 107, 107, 108, 108, 109, 109, 109, 109,
+        98, 97, 95, 94, 93, 94, 95, 95, 96, 97, 98, 99, 100, 100, 101, 102, 103,
+        104, 104, 105, 106, 106, 107, 107, 108, 108, 108, 109, 109, 109, 109,
+        109, 99, 98, 97, 96, 95, 95, 96, 97, 97, 98, 99, 100, 101, 101, 102,
+        103, 104, 104, 105, 105, 106, 107, 107, 107, 108, 108, 109, 109, 109,
+        109, 109, 109, 99, 98, 97, 96, 95, 95, 96, 97, 97, 98, 99, 100, 101,
+        101, 102, 103, 104, 104, 105, 105, 106, 107, 107, 107, 108, 108, 109,
+        109, 109, 109, 109, 109, 99, 98, 97, 96, 95, 95, 96, 97, 97, 98, 99,
+        100, 101, 101, 102, 103, 104, 104, 105, 105, 106, 107, 107, 107, 108,
+        108, 109, 109, 109, 109, 109, 109, 99, 98, 97, 96, 95, 95, 96, 97, 97,
+        98, 99, 100, 101, 101, 102, 103, 104, 104, 105, 105, 106, 107, 107, 107,
+        108, 108, 109, 109, 109, 109, 109, 109 },
+      { /* Intra matrices */
+        /* Size 4 */
+        41, 43, 57, 69, 43, 54, 63, 71, 57, 63, 72, 77, 69, 71, 77, 80,
+        /* Size 8 */
+        46, 38, 40, 47, 55, 63, 68, 73, 38, 42, 41, 45, 51, 58, 64, 69, 40, 41,
+        50, 54, 58, 63, 67, 71, 47, 45, 54, 60, 64, 68, 71, 74, 55, 51, 58, 64,
+        69, 72, 74, 76, 63, 58, 63, 68, 72, 75, 77, 78, 68, 64, 67, 71, 74, 77,
+        78, 80, 73, 69, 71, 74, 76, 78, 80, 81,
+        /* Size 16 */
+        45, 41, 38, 38, 39, 43, 47, 50, 54, 58, 61, 64, 67, 69, 71, 71, 41, 40,
+        39, 39, 40, 42, 45, 49, 52, 56, 59, 62, 65, 67, 70, 70, 38, 39, 41, 40,
+        40, 42, 44, 47, 50, 54, 57, 60, 63, 65, 68, 68, 38, 39, 40, 42, 44, 46,
+        48, 51, 54, 56, 59, 62, 65, 67, 69, 69, 39, 40, 40, 44, 49, 51, 53, 55,
+        57, 59, 62, 64, 66, 68, 70, 70, 43, 42, 42, 46, 51, 53, 56, 58, 60, 62,
+        64, 66, 68, 69, 71, 71, 47, 45, 44, 48, 53, 56, 59, 61, 63, 65, 67, 68,
+        70, 71, 72, 72, 50, 49, 47, 51, 55, 58, 61, 63, 65, 67, 69, 70, 71, 72,
+        74, 74, 54, 52, 50, 54, 57, 60, 63, 65, 68, 69, 71, 72, 73, 74, 75, 75,
+        58, 56, 54, 56, 59, 62, 65, 67, 69, 70, 72, 73, 74, 75, 76, 76, 61, 59,
+        57, 59, 62, 64, 67, 69, 71, 72, 73, 74, 75, 76, 77, 77, 64, 62, 60, 62,
+        64, 66, 68, 70, 72, 73, 74, 75, 76, 77, 78, 78, 67, 65, 63, 65, 66, 68,
+        70, 71, 73, 74, 75, 76, 77, 78, 78, 78, 69, 67, 65, 67, 68, 69, 71, 72,
+        74, 75, 76, 77, 78, 78, 79, 79, 71, 70, 68, 69, 70, 71, 72, 74, 75, 76,
+        77, 78, 78, 79, 79, 79, 71, 70, 68, 69, 70, 71, 72, 74, 75, 76, 77, 78,
+        78, 79, 79, 79,
+        /* Size 32 */
+        44, 42, 40, 39, 37, 38, 38, 39, 39, 41, 42, 44, 46, 48, 50, 52, 54, 56,
+        57, 59, 61, 62, 64, 65, 66, 68, 69, 70, 71, 71, 71, 71, 42, 41, 40, 39,
+        38, 38, 39, 39, 39, 41, 42, 44, 45, 47, 49, 51, 53, 54, 56, 58, 60, 61,
+        62, 64, 65, 67, 68, 69, 70, 70, 70, 70, 40, 40, 40, 39, 39, 39, 39, 39,
+        39, 41, 42, 43, 45, 46, 48, 50, 52, 53, 55, 57, 59, 60, 61, 63, 64, 66,
+        67, 68, 69, 69, 69, 69, 39, 39, 39, 39, 40, 40, 40, 39, 39, 40, 42, 43,
+        44, 46, 47, 49, 51, 52, 54, 56, 58, 59, 60, 62, 63, 65, 66, 67, 68, 68,
+        68, 68, 37, 38, 39, 40, 41, 40, 40, 40, 39, 40, 41, 42, 44, 45, 47, 48,
+        50, 51, 53, 55, 57, 58, 59, 61, 63, 64, 65, 66, 67, 67, 67, 67, 38, 38,
+        39, 40, 40, 41, 41, 41, 41, 42, 43, 44, 45, 47, 48, 50, 51, 53, 54, 56,
+        58, 59, 60, 62, 63, 64, 65, 67, 68, 68, 68, 68, 38, 39, 39, 40, 40, 41,
+        42, 43, 43, 44, 45, 46, 48, 49, 50, 52, 53, 54, 56, 57, 59, 60, 61, 63,
+        64, 65, 66, 67, 68, 68, 68, 68, 39, 39, 39, 39, 40, 41, 43, 44, 46, 47,
+        48, 49, 50, 51, 52, 53, 55, 56, 57, 59, 60, 61, 62, 63, 65, 66, 67, 68,
+        69, 69, 69, 69, 39, 39, 39, 39, 39, 41, 43, 46, 48, 49, 50, 51, 52, 53,
+        54, 55, 56, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 69, 69, 69,
+        41, 41, 41, 40, 40, 42, 44, 47, 49, 50, 51, 53, 54, 55, 56, 57, 58, 59,
+        60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 70, 70, 70, 42, 42, 42, 42,
+        41, 43, 45, 48, 50, 51, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
+        65, 66, 67, 68, 69, 70, 71, 71, 71, 71, 44, 44, 43, 43, 42, 44, 46, 49,
+        51, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 66, 67, 68, 69,
+        70, 70, 71, 71, 71, 71, 46, 45, 45, 44, 44, 45, 48, 50, 52, 54, 55, 57,
+        59, 60, 61, 62, 63, 63, 64, 65, 66, 67, 67, 68, 69, 70, 70, 71, 72, 72,
+        72, 72, 48, 47, 46, 46, 45, 47, 49, 51, 53, 55, 56, 58, 60, 61, 62, 63,
+        64, 64, 65, 66, 67, 68, 68, 69, 70, 70, 71, 72, 72, 72, 72, 72, 50, 49,
+        48, 47, 47, 48, 50, 52, 54, 56, 57, 59, 61, 62, 63, 64, 65, 66, 66, 67,
+        68, 69, 69, 70, 71, 71, 72, 72, 73, 73, 73, 73, 52, 51, 50, 49, 48, 50,
+        52, 53, 55, 57, 58, 60, 62, 63, 64, 65, 66, 67, 67, 68, 69, 69, 70, 71,
+        71, 72, 72, 73, 74, 74, 74, 74, 54, 53, 52, 51, 50, 51, 53, 55, 56, 58,
+        59, 61, 63, 64, 65, 66, 67, 68, 68, 69, 70, 70, 71, 72, 72, 73, 73, 74,
+        74, 74, 74, 74, 56, 54, 53, 52, 51, 53, 54, 56, 58, 59, 60, 62, 63, 64,
+        66, 67, 68, 68, 69, 70, 71, 71, 72, 72, 73, 73, 74, 74, 75, 75, 75, 75,
+        57, 56, 55, 54, 53, 54, 56, 57, 59, 60, 61, 63, 64, 65, 66, 67, 68, 69,
+        70, 71, 71, 72, 72, 73, 73, 74, 74, 75, 75, 75, 75, 75, 59, 58, 57, 56,
+        55, 56, 57, 59, 60, 61, 62, 64, 65, 66, 67, 68, 69, 70, 71, 71, 72, 72,
+        73, 73, 74, 74, 75, 75, 76, 76, 76, 76, 61, 60, 59, 58, 57, 58, 59, 60,
+        61, 62, 63, 65, 66, 67, 68, 69, 70, 71, 71, 72, 73, 73, 74, 74, 75, 75,
+        75, 76, 76, 76, 76, 76, 62, 61, 60, 59, 58, 59, 60, 61, 62, 63, 64, 66,
+        67, 68, 69, 69, 70, 71, 72, 72, 73, 74, 74, 75, 75, 75, 76, 76, 76, 76,
+        76, 76, 64, 62, 61, 60, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
+        71, 72, 72, 73, 74, 74, 75, 75, 75, 76, 76, 76, 77, 77, 77, 77, 65, 64,
+        63, 62, 61, 62, 63, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 72, 73, 73,
+        74, 75, 75, 75, 76, 76, 77, 77, 77, 77, 77, 77, 66, 65, 64, 63, 63, 63,
+        64, 65, 65, 66, 67, 68, 69, 70, 71, 71, 72, 73, 73, 74, 75, 75, 75, 76,
+        76, 77, 77, 77, 78, 78, 78, 78, 68, 67, 66, 65, 64, 64, 65, 66, 66, 67,
+        68, 69, 70, 70, 71, 72, 73, 73, 74, 74, 75, 75, 76, 76, 77, 77, 77, 78,
+        78, 78, 78, 78, 69, 68, 67, 66, 65, 65, 66, 67, 67, 68, 69, 70, 70, 71,
+        72, 72, 73, 74, 74, 75, 75, 76, 76, 77, 77, 77, 77, 78, 78, 78, 78, 78,
+        70, 69, 68, 67, 66, 67, 67, 68, 68, 69, 70, 70, 71, 72, 72, 73, 74, 74,
+        75, 75, 76, 76, 76, 77, 77, 78, 78, 78, 78, 78, 78, 78, 71, 70, 69, 68,
+        67, 68, 68, 69, 69, 70, 71, 71, 72, 72, 73, 74, 74, 75, 75, 76, 76, 76,
+        77, 77, 78, 78, 78, 78, 79, 79, 79, 79, 71, 70, 69, 68, 67, 68, 68, 69,
+        69, 70, 71, 71, 72, 72, 73, 74, 74, 75, 75, 76, 76, 76, 77, 77, 78, 78,
+        78, 78, 79, 79, 79, 79, 71, 70, 69, 68, 67, 68, 68, 69, 69, 70, 71, 71,
+        72, 72, 73, 74, 74, 75, 75, 76, 76, 76, 77, 77, 78, 78, 78, 78, 79, 79,
+        79, 79, 71, 70, 69, 68, 67, 68, 68, 69, 69, 70, 71, 71, 72, 72, 73, 74,
+        74, 75, 75, 76, 76, 76, 77, 77, 78, 78, 78, 78, 79, 79, 79, 79 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 83, 86, 96, 83, 90, 93, 98, 86, 93, 103, 108, 96, 98, 108, 114,
+        /* Size 8 */
+        64, 56, 79, 81, 84, 88, 93, 97, 56, 70, 80, 76, 77, 81, 85, 90, 79, 80,
+        86, 84, 84, 86, 90, 93, 81, 76, 84, 89, 91, 93, 95, 98, 84, 77, 84, 91,
+        95, 98, 100, 102, 88, 81, 86, 93, 98, 101, 104, 106, 93, 85, 90, 95,
+        100, 104, 106, 108, 97, 90, 93, 98, 102, 106, 108, 111,
+        /* Size 16 */
+        64, 60, 56, 66, 79, 80, 81, 82, 84, 86, 88, 90, 93, 95, 97, 97, 60, 61,
+        62, 70, 79, 79, 78, 79, 80, 82, 84, 86, 89, 91, 94, 94, 56, 62, 70, 74,
+        80, 78, 76, 76, 77, 79, 81, 83, 85, 88, 90, 90, 66, 70, 74, 78, 83, 81,
+        80, 80, 80, 82, 83, 85, 87, 89, 92, 92, 79, 79, 80, 83, 86, 85, 84, 84,
+        84, 85, 86, 88, 90, 91, 93, 93, 80, 79, 78, 81, 85, 86, 87, 87, 88, 88,
+        89, 91, 92, 94, 95, 95, 81, 78, 76, 80, 84, 87, 89, 90, 91, 92, 93, 94,
+        95, 96, 98, 98, 82, 79, 76, 80, 84, 87, 90, 92, 93, 94, 95, 96, 97, 98,
+        100, 100, 84, 80, 77, 80, 84, 88, 91, 93, 95, 96, 98, 99, 100, 101, 102,
+        102, 86, 82, 79, 82, 85, 88, 92, 94, 96, 98, 99, 101, 102, 103, 104,
+        104, 88, 84, 81, 83, 86, 89, 93, 95, 98, 99, 101, 102, 104, 105, 106,
+        106, 90, 86, 83, 85, 88, 91, 94, 96, 99, 101, 102, 104, 105, 106, 107,
+        107, 93, 89, 85, 87, 90, 92, 95, 97, 100, 102, 104, 105, 106, 107, 108,
+        108, 95, 91, 88, 89, 91, 94, 96, 98, 101, 103, 105, 106, 107, 108, 109,
+        109, 97, 94, 90, 92, 93, 95, 98, 100, 102, 104, 106, 107, 108, 109, 111,
+        111, 97, 94, 90, 92, 93, 95, 98, 100, 102, 104, 106, 107, 108, 109, 111,
+        111,
+        /* Size 32 */
+        64, 62, 60, 58, 56, 60, 66, 72, 79, 79, 80, 81, 81, 82, 82, 83, 84, 85,
+        86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 97, 97, 97, 62, 61, 60, 60,
+        59, 63, 68, 73, 79, 79, 79, 80, 80, 80, 81, 81, 82, 83, 84, 85, 86, 87,
+        88, 90, 91, 92, 93, 94, 95, 95, 95, 95, 60, 60, 61, 62, 62, 66, 70, 74,
+        79, 79, 79, 79, 78, 79, 79, 80, 80, 81, 82, 83, 84, 85, 86, 88, 89, 90,
+        91, 92, 94, 94, 94, 94, 58, 60, 62, 64, 66, 69, 72, 75, 79, 79, 78, 78,
+        77, 77, 78, 78, 78, 79, 80, 81, 82, 83, 85, 86, 87, 88, 89, 91, 92, 92,
+        92, 92, 56, 59, 62, 66, 70, 72, 74, 77, 80, 79, 78, 77, 76, 76, 76, 77,
+        77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 88, 89, 90, 90, 90, 90, 60, 63,
+        66, 69, 72, 74, 76, 79, 81, 80, 79, 78, 78, 78, 78, 78, 79, 79, 80, 81,
+        82, 83, 84, 85, 86, 87, 89, 90, 91, 91, 91, 91, 66, 68, 70, 72, 74, 76,
+        78, 80, 83, 82, 81, 80, 80, 80, 80, 80, 80, 81, 82, 83, 83, 84, 85, 86,
+        87, 88, 89, 91, 92, 92, 92, 92, 72, 73, 74, 75, 77, 79, 80, 82, 84, 84,
+        83, 83, 82, 82, 82, 82, 82, 83, 83, 84, 85, 86, 87, 87, 88, 89, 90, 91,
+        92, 92, 92, 92, 79, 79, 79, 79, 80, 81, 83, 84, 86, 85, 85, 85, 84, 84,
+        84, 84, 84, 85, 85, 86, 86, 87, 88, 89, 90, 90, 91, 92, 93, 93, 93, 93,
+        79, 79, 79, 79, 79, 80, 82, 84, 85, 85, 85, 85, 86, 86, 86, 86, 86, 86,
+        87, 87, 88, 88, 89, 90, 91, 92, 93, 93, 94, 94, 94, 94, 80, 79, 79, 78,
+        78, 79, 81, 83, 85, 85, 86, 86, 87, 87, 87, 87, 88, 88, 88, 89, 89, 90,
+        91, 91, 92, 93, 94, 95, 95, 95, 95, 95, 81, 80, 79, 78, 77, 78, 80, 83,
+        85, 85, 86, 87, 88, 88, 89, 89, 89, 90, 90, 91, 91, 92, 92, 93, 93, 94,
+        95, 96, 96, 96, 96, 96, 81, 80, 78, 77, 76, 78, 80, 82, 84, 86, 87, 88,
+        89, 90, 90, 91, 91, 91, 92, 92, 93, 93, 94, 94, 95, 96, 96, 97, 98, 98,
+        98, 98, 82, 80, 79, 77, 76, 78, 80, 82, 84, 86, 87, 88, 90, 90, 91, 91,
+        92, 92, 93, 93, 94, 94, 95, 95, 96, 97, 97, 98, 99, 99, 99, 99, 82, 81,
+        79, 78, 76, 78, 80, 82, 84, 86, 87, 89, 90, 91, 92, 92, 93, 94, 94, 95,
+        95, 96, 96, 97, 97, 98, 98, 99, 100, 100, 100, 100, 83, 81, 80, 78, 77,
+        78, 80, 82, 84, 86, 87, 89, 91, 91, 92, 93, 94, 95, 95, 96, 96, 97, 97,
+        98, 99, 99, 100, 100, 101, 101, 101, 101, 84, 82, 80, 78, 77, 79, 80,
+        82, 84, 86, 88, 89, 91, 92, 93, 94, 95, 96, 96, 97, 98, 98, 99, 99, 100,
+        100, 101, 101, 102, 102, 102, 102, 85, 83, 81, 79, 78, 79, 81, 83, 85,
+        86, 88, 90, 91, 92, 94, 95, 96, 96, 97, 98, 99, 99, 100, 100, 101, 101,
+        102, 102, 103, 103, 103, 103, 86, 84, 82, 80, 79, 80, 82, 83, 85, 87,
+        88, 90, 92, 93, 94, 95, 96, 97, 98, 99, 99, 100, 101, 101, 102, 102,
+        103, 103, 104, 104, 104, 104, 87, 85, 83, 81, 80, 81, 83, 84, 86, 87,
+        89, 91, 92, 93, 95, 96, 97, 98, 99, 99, 100, 101, 101, 102, 103, 103,
+        104, 104, 105, 105, 105, 105, 88, 86, 84, 82, 81, 82, 83, 85, 86, 88,
+        89, 91, 93, 94, 95, 96, 98, 99, 99, 100, 101, 102, 102, 103, 104, 104,
+        105, 105, 106, 106, 106, 106, 89, 87, 85, 83, 82, 83, 84, 86, 87, 88,
+        90, 92, 93, 94, 96, 97, 98, 99, 100, 101, 102, 102, 103, 104, 104, 105,
+        105, 106, 106, 106, 106, 106, 90, 88, 86, 85, 83, 84, 85, 87, 88, 89,
+        91, 92, 94, 95, 96, 97, 99, 100, 101, 101, 102, 103, 104, 104, 105, 105,
+        106, 106, 107, 107, 107, 107, 92, 90, 88, 86, 84, 85, 86, 87, 89, 90,
+        91, 93, 94, 95, 97, 98, 99, 100, 101, 102, 103, 104, 104, 105, 106, 106,
+        107, 107, 108, 108, 108, 108, 93, 91, 89, 87, 85, 86, 87, 88, 90, 91,
+        92, 93, 95, 96, 97, 99, 100, 101, 102, 103, 104, 104, 105, 106, 106,
+        107, 107, 108, 108, 108, 108, 108, 94, 92, 90, 88, 86, 87, 88, 89, 90,
+        92, 93, 94, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 105, 106, 107,
+        107, 108, 108, 109, 109, 109, 109, 95, 93, 91, 89, 88, 89, 89, 90, 91,
+        93, 94, 95, 96, 97, 98, 100, 101, 102, 103, 104, 105, 105, 106, 107,
+        107, 108, 108, 109, 109, 109, 109, 109, 96, 94, 92, 91, 89, 90, 91, 91,
+        92, 93, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 106, 107,
+        108, 108, 109, 109, 110, 110, 110, 110, 97, 95, 94, 92, 90, 91, 92, 92,
+        93, 94, 95, 96, 98, 99, 100, 101, 102, 103, 104, 105, 106, 106, 107,
+        108, 108, 109, 109, 110, 111, 111, 111, 111, 97, 95, 94, 92, 90, 91, 92,
+        92, 93, 94, 95, 96, 98, 99, 100, 101, 102, 103, 104, 105, 106, 106, 107,
+        108, 108, 109, 109, 110, 111, 111, 111, 111, 97, 95, 94, 92, 90, 91, 92,
+        92, 93, 94, 95, 96, 98, 99, 100, 101, 102, 103, 104, 105, 106, 106, 107,
+        108, 108, 109, 109, 110, 111, 111, 111, 111, 97, 95, 94, 92, 90, 91, 92,
+        92, 93, 94, 95, 96, 98, 99, 100, 101, 102, 103, 104, 105, 106, 106, 107,
+        108, 108, 109, 109, 110, 111, 111, 111, 111 },
+      { /* Intra matrices */
+        /* Size 4 */
+        42, 56, 58, 65, 56, 61, 63, 67, 58, 63, 70, 74, 65, 67, 74, 78,
+        /* Size 8 */
+        45, 39, 55, 57, 59, 62, 66, 69, 39, 49, 56, 53, 54, 57, 60, 64, 55, 56,
+        61, 60, 60, 61, 63, 66, 57, 53, 60, 63, 65, 66, 68, 70, 59, 54, 60, 65,
+        68, 70, 71, 73, 62, 57, 61, 66, 70, 72, 74, 76, 66, 60, 63, 68, 71, 74,
+        76, 78, 69, 64, 66, 70, 73, 76, 78, 80,
+        /* Size 16 */
+        44, 41, 38, 45, 55, 56, 56, 57, 58, 60, 62, 63, 65, 67, 69, 69, 41, 42,
+        43, 48, 55, 55, 54, 55, 56, 57, 59, 60, 62, 64, 66, 66, 38, 43, 48, 52,
+        55, 54, 52, 53, 53, 55, 56, 58, 59, 61, 63, 63, 45, 48, 52, 54, 57, 56,
+        55, 56, 56, 57, 58, 60, 61, 63, 64, 64, 55, 55, 55, 57, 60, 59, 59, 59,
+        59, 59, 60, 61, 63, 64, 66, 66, 56, 55, 54, 56, 59, 60, 61, 61, 61, 62,
+        63, 64, 65, 66, 67, 67, 56, 54, 52, 55, 59, 61, 62, 63, 64, 64, 65, 66,
+        67, 68, 69, 69, 57, 55, 53, 56, 59, 61, 63, 64, 65, 66, 67, 68, 69, 69,
+        70, 70, 58, 56, 53, 56, 59, 61, 64, 65, 67, 68, 69, 70, 70, 71, 72, 72,
+        60, 57, 55, 57, 59, 62, 64, 66, 68, 69, 70, 71, 72, 73, 73, 73, 62, 59,
+        56, 58, 60, 63, 65, 67, 69, 70, 71, 72, 73, 74, 75, 75, 63, 60, 58, 60,
+        61, 64, 66, 68, 70, 71, 72, 73, 74, 75, 76, 76, 65, 62, 59, 61, 63, 65,
+        67, 69, 70, 72, 73, 74, 75, 76, 77, 77, 67, 64, 61, 63, 64, 66, 68, 69,
+        71, 73, 74, 75, 76, 77, 78, 78, 69, 66, 63, 64, 66, 67, 69, 70, 72, 73,
+        75, 76, 77, 78, 79, 79, 69, 66, 63, 64, 66, 67, 69, 70, 72, 73, 75, 76,
+        77, 78, 79, 79,
+        /* Size 32 */
+        44, 42, 41, 39, 38, 41, 45, 49, 54, 55, 55, 56, 56, 57, 57, 58, 58, 59,
+        60, 60, 61, 62, 63, 64, 65, 66, 66, 67, 68, 68, 68, 68, 42, 42, 41, 41,
+        40, 43, 46, 50, 55, 55, 55, 55, 55, 55, 56, 56, 57, 57, 58, 59, 60, 61,
+        61, 62, 63, 64, 65, 66, 67, 67, 67, 67, 41, 41, 42, 42, 42, 45, 48, 51,
+        55, 55, 54, 54, 54, 54, 55, 55, 55, 56, 57, 58, 58, 59, 60, 61, 62, 63,
+        63, 64, 65, 65, 65, 65, 39, 41, 42, 43, 45, 47, 49, 52, 55, 54, 54, 53,
+        53, 53, 54, 54, 54, 55, 55, 56, 57, 58, 59, 59, 60, 61, 62, 63, 64, 64,
+        64, 64, 38, 40, 42, 45, 48, 49, 51, 53, 55, 54, 53, 53, 52, 52, 53, 53,
+        53, 54, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 63, 63, 63, 41, 43,
+        45, 47, 49, 51, 53, 54, 56, 55, 55, 54, 54, 54, 54, 54, 54, 55, 55, 56,
+        57, 57, 58, 59, 60, 61, 62, 62, 63, 63, 63, 63, 45, 46, 48, 49, 51, 53,
+        54, 56, 57, 57, 56, 56, 55, 55, 55, 55, 56, 56, 57, 57, 58, 58, 59, 60,
+        61, 61, 62, 63, 64, 64, 64, 64, 49, 50, 51, 52, 53, 54, 56, 57, 58, 58,
+        57, 57, 57, 57, 57, 57, 57, 57, 58, 58, 59, 59, 60, 61, 61, 62, 63, 64,
+        64, 64, 64, 64, 54, 55, 55, 55, 55, 56, 57, 58, 59, 59, 59, 59, 58, 58,
+        58, 58, 58, 59, 59, 59, 60, 60, 61, 62, 62, 63, 64, 64, 65, 65, 65, 65,
+        55, 55, 55, 54, 54, 55, 57, 58, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60,
+        60, 61, 61, 62, 62, 63, 63, 64, 65, 65, 66, 66, 66, 66, 55, 55, 54, 54,
+        53, 55, 56, 57, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 63,
+        63, 64, 64, 65, 65, 66, 67, 67, 67, 67, 56, 55, 54, 53, 53, 54, 56, 57,
+        59, 59, 60, 60, 61, 61, 62, 62, 62, 62, 63, 63, 63, 64, 64, 65, 65, 66,
+        66, 67, 67, 67, 67, 67, 56, 55, 54, 53, 52, 54, 55, 57, 58, 59, 60, 61,
+        62, 62, 63, 63, 63, 64, 64, 64, 65, 65, 65, 66, 66, 67, 67, 68, 68, 68,
+        68, 68, 57, 55, 54, 53, 52, 54, 55, 57, 58, 59, 60, 61, 62, 63, 63, 64,
+        64, 64, 65, 65, 66, 66, 66, 67, 67, 68, 68, 69, 69, 69, 69, 69, 57, 56,
+        55, 54, 53, 54, 55, 57, 58, 59, 60, 62, 63, 63, 64, 64, 65, 65, 66, 66,
+        66, 67, 67, 68, 68, 69, 69, 69, 70, 70, 70, 70, 58, 56, 55, 54, 53, 54,
+        55, 57, 58, 59, 61, 62, 63, 64, 64, 65, 66, 66, 67, 67, 67, 68, 68, 69,
+        69, 69, 70, 70, 71, 71, 71, 71, 58, 57, 55, 54, 53, 54, 56, 57, 58, 60,
+        61, 62, 63, 64, 65, 66, 66, 67, 67, 68, 68, 69, 69, 70, 70, 70, 71, 71,
+        72, 72, 72, 72, 59, 57, 56, 55, 54, 55, 56, 57, 59, 60, 61, 62, 64, 64,
+        65, 66, 67, 67, 68, 68, 69, 69, 70, 70, 71, 71, 71, 72, 72, 72, 72, 72,
+        60, 58, 57, 55, 54, 55, 57, 58, 59, 60, 61, 63, 64, 65, 66, 67, 67, 68,
+        69, 69, 70, 70, 71, 71, 71, 72, 72, 73, 73, 73, 73, 73, 60, 59, 58, 56,
+        55, 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 68, 69, 70, 70, 71,
+        71, 72, 72, 72, 73, 73, 74, 74, 74, 74, 61, 60, 58, 57, 56, 57, 58, 59,
+        60, 61, 62, 63, 65, 66, 66, 67, 68, 69, 70, 70, 71, 71, 72, 72, 73, 73,
+        74, 74, 74, 74, 74, 74, 62, 61, 59, 58, 56, 57, 58, 59, 60, 62, 63, 64,
+        65, 66, 67, 68, 69, 69, 70, 71, 71, 72, 72, 73, 73, 74, 74, 74, 75, 75,
+        75, 75, 63, 61, 60, 59, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68,
+        69, 70, 71, 71, 72, 72, 73, 73, 74, 74, 75, 75, 75, 75, 75, 75, 64, 62,
+        61, 59, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 70, 71, 72,
+        72, 73, 73, 74, 74, 75, 75, 76, 76, 76, 76, 76, 65, 63, 62, 60, 59, 60,
+        61, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 71, 72, 73, 73, 74, 74,
+        75, 75, 76, 76, 76, 76, 76, 76, 66, 64, 63, 61, 60, 61, 61, 62, 63, 64,
+        65, 66, 67, 68, 69, 69, 70, 71, 72, 72, 73, 74, 74, 75, 75, 76, 76, 76,
+        77, 77, 77, 77, 66, 65, 63, 62, 61, 62, 62, 63, 64, 65, 65, 66, 67, 68,
+        69, 70, 71, 71, 72, 73, 74, 74, 75, 75, 76, 76, 76, 77, 77, 77, 77, 77,
+        67, 66, 64, 63, 62, 62, 63, 64, 64, 65, 66, 67, 68, 69, 69, 70, 71, 72,
+        73, 73, 74, 74, 75, 76, 76, 76, 77, 77, 78, 78, 78, 78, 68, 67, 65, 64,
+        63, 63, 64, 64, 65, 66, 67, 67, 68, 69, 70, 71, 72, 72, 73, 74, 74, 75,
+        75, 76, 76, 77, 77, 78, 78, 78, 78, 78, 68, 67, 65, 64, 63, 63, 64, 64,
+        65, 66, 67, 67, 68, 69, 70, 71, 72, 72, 73, 74, 74, 75, 75, 76, 76, 77,
+        77, 78, 78, 78, 78, 78, 68, 67, 65, 64, 63, 63, 64, 64, 65, 66, 67, 67,
+        68, 69, 70, 71, 72, 72, 73, 74, 74, 75, 75, 76, 76, 77, 77, 78, 78, 78,
+        78, 78, 68, 67, 65, 64, 63, 63, 64, 64, 65, 66, 67, 67, 68, 69, 70, 71,
+        72, 72, 73, 74, 74, 75, 75, 76, 76, 77, 77, 78, 78, 78, 78, 78 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 67, 84, 97, 67, 81, 91, 99, 84, 91, 100, 105, 97, 99, 105, 108,
+        /* Size 8 */
+        64, 55, 58, 66, 75, 82, 88, 92, 55, 59, 58, 63, 70, 78, 84, 89, 58, 58,
+        68, 73, 78, 82, 87, 91, 66, 63, 73, 80, 84, 87, 90, 93, 75, 70, 78, 84,
+        88, 91, 93, 95, 82, 78, 82, 87, 91, 94, 96, 97, 88, 84, 87, 90, 93, 96,
+        97, 98, 92, 89, 91, 93, 95, 97, 98, 99,
+        /* Size 16 */
+        64, 59, 55, 56, 58, 62, 66, 70, 75, 78, 82, 85, 88, 90, 92, 92, 59, 58,
+        57, 58, 58, 61, 65, 68, 73, 76, 80, 83, 86, 88, 90, 90, 55, 57, 59, 59,
+        58, 60, 63, 67, 70, 74, 78, 81, 84, 86, 89, 89, 56, 58, 59, 61, 63, 65,
+        68, 71, 74, 77, 80, 83, 85, 87, 90, 90, 58, 58, 58, 63, 68, 71, 73, 75,
+        78, 80, 82, 85, 87, 89, 91, 91, 62, 61, 60, 65, 71, 73, 76, 78, 81, 83,
+        85, 87, 89, 90, 92, 92, 66, 65, 63, 68, 73, 76, 80, 82, 84, 86, 87, 89,
+        90, 92, 93, 93, 70, 68, 67, 71, 75, 78, 82, 84, 86, 88, 89, 91, 92, 93,
+        94, 94, 75, 73, 70, 74, 78, 81, 84, 86, 88, 90, 91, 92, 93, 94, 95, 95,
+        78, 76, 74, 77, 80, 83, 86, 88, 90, 91, 92, 93, 94, 95, 96, 96, 82, 80,
+        78, 80, 82, 85, 87, 89, 91, 92, 94, 95, 96, 96, 97, 97, 85, 83, 81, 83,
+        85, 87, 89, 91, 92, 93, 95, 96, 96, 97, 98, 98, 88, 86, 84, 85, 87, 89,
+        90, 92, 93, 94, 96, 96, 97, 98, 98, 98, 90, 88, 86, 87, 89, 90, 92, 93,
+        94, 95, 96, 97, 98, 98, 99, 99, 92, 90, 89, 90, 91, 92, 93, 94, 95, 96,
+        97, 98, 98, 99, 99, 99, 92, 90, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,
+        98, 99, 99, 99,
+        /* Size 32 */
+        64, 62, 59, 57, 55, 56, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 75, 77,
+        78, 80, 82, 84, 85, 86, 88, 89, 90, 91, 92, 92, 92, 92, 62, 60, 59, 58,
+        56, 57, 57, 57, 58, 60, 61, 63, 65, 67, 69, 71, 74, 75, 77, 79, 81, 82,
+        84, 85, 87, 88, 89, 90, 91, 91, 91, 91, 59, 59, 58, 58, 57, 57, 58, 58,
+        58, 59, 61, 63, 65, 66, 68, 70, 73, 74, 76, 78, 80, 81, 83, 84, 86, 87,
+        88, 89, 90, 90, 90, 90, 57, 58, 58, 58, 58, 58, 58, 58, 58, 59, 61, 62,
+        64, 66, 67, 69, 72, 73, 75, 77, 79, 80, 82, 83, 85, 86, 87, 88, 90, 90,
+        90, 90, 55, 56, 57, 58, 59, 59, 59, 58, 58, 59, 60, 62, 63, 65, 67, 68,
+        70, 72, 74, 76, 78, 79, 81, 82, 84, 85, 86, 87, 89, 89, 89, 89, 56, 57,
+        57, 58, 59, 59, 60, 60, 60, 62, 63, 64, 65, 67, 69, 70, 72, 74, 75, 77,
+        79, 80, 82, 83, 85, 86, 87, 88, 89, 89, 89, 89, 56, 57, 58, 58, 59, 60,
+        61, 62, 63, 64, 65, 66, 68, 69, 71, 72, 74, 75, 77, 78, 80, 81, 83, 84,
+        85, 86, 87, 89, 90, 90, 90, 90, 57, 57, 58, 58, 58, 60, 62, 64, 66, 67,
+        68, 69, 70, 72, 73, 74, 76, 77, 78, 80, 81, 82, 84, 85, 86, 87, 88, 89,
+        90, 90, 90, 90, 58, 58, 58, 58, 58, 60, 63, 66, 68, 70, 71, 72, 73, 74,
+        75, 76, 78, 79, 80, 81, 82, 83, 85, 86, 87, 88, 89, 90, 91, 91, 91, 91,
+        60, 60, 59, 59, 59, 62, 64, 67, 70, 71, 72, 73, 75, 76, 77, 78, 79, 80,
+        81, 82, 84, 85, 86, 87, 88, 89, 89, 90, 91, 91, 91, 91, 62, 61, 61, 61,
+        60, 63, 65, 68, 71, 72, 73, 75, 76, 77, 78, 80, 81, 82, 83, 84, 85, 86,
+        87, 88, 89, 89, 90, 91, 92, 92, 92, 92, 64, 63, 63, 62, 62, 64, 66, 69,
+        72, 73, 75, 76, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 89, 90,
+        91, 92, 92, 92, 92, 92, 66, 65, 65, 64, 63, 65, 68, 70, 73, 75, 76, 78,
+        80, 81, 82, 83, 84, 85, 86, 87, 87, 88, 89, 90, 90, 91, 92, 92, 93, 93,
+        93, 93, 68, 67, 66, 66, 65, 67, 69, 72, 74, 76, 77, 79, 81, 82, 83, 84,
+        85, 86, 87, 87, 88, 89, 90, 90, 91, 92, 92, 93, 94, 94, 94, 94, 70, 69,
+        68, 67, 67, 69, 71, 73, 75, 77, 78, 80, 82, 83, 84, 85, 86, 87, 88, 88,
+        89, 90, 91, 91, 92, 92, 93, 94, 94, 94, 94, 94, 72, 71, 70, 69, 68, 70,
+        72, 74, 76, 78, 80, 81, 83, 84, 85, 86, 87, 88, 89, 89, 90, 91, 91, 92,
+        93, 93, 94, 94, 95, 95, 95, 95, 75, 74, 73, 72, 70, 72, 74, 76, 78, 79,
+        81, 82, 84, 85, 86, 87, 88, 89, 90, 90, 91, 92, 92, 93, 93, 94, 94, 95,
+        95, 95, 95, 95, 77, 75, 74, 73, 72, 74, 75, 77, 79, 80, 82, 83, 85, 86,
+        87, 88, 89, 90, 90, 91, 92, 92, 93, 93, 94, 94, 95, 95, 96, 96, 96, 96,
+        78, 77, 76, 75, 74, 75, 77, 78, 80, 81, 83, 84, 86, 87, 88, 89, 90, 90,
+        91, 92, 92, 93, 93, 94, 94, 95, 95, 96, 96, 96, 96, 96, 80, 79, 78, 77,
+        76, 77, 78, 80, 81, 82, 84, 85, 87, 87, 88, 89, 90, 91, 92, 92, 93, 94,
+        94, 95, 95, 95, 96, 96, 97, 97, 97, 97, 82, 81, 80, 79, 78, 79, 80, 81,
+        82, 84, 85, 86, 87, 88, 89, 90, 91, 92, 92, 93, 94, 94, 95, 95, 96, 96,
+        96, 97, 97, 97, 97, 97, 84, 82, 81, 80, 79, 80, 81, 82, 83, 85, 86, 87,
+        88, 89, 90, 91, 92, 92, 93, 94, 94, 95, 95, 96, 96, 96, 97, 97, 97, 97,
+        97, 97, 85, 84, 83, 82, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 91,
+        92, 93, 93, 94, 95, 95, 96, 96, 96, 97, 97, 97, 98, 98, 98, 98, 86, 85,
+        84, 83, 82, 83, 84, 85, 86, 87, 88, 89, 90, 90, 91, 92, 93, 93, 94, 95,
+        95, 96, 96, 96, 97, 97, 97, 98, 98, 98, 98, 98, 88, 87, 86, 85, 84, 85,
+        85, 86, 87, 88, 89, 89, 90, 91, 92, 93, 93, 94, 94, 95, 96, 96, 96, 97,
+        97, 97, 98, 98, 98, 98, 98, 98, 89, 88, 87, 86, 85, 86, 86, 87, 88, 89,
+        89, 90, 91, 92, 92, 93, 94, 94, 95, 95, 96, 96, 97, 97, 97, 98, 98, 98,
+        99, 99, 99, 99, 90, 89, 88, 87, 86, 87, 87, 88, 89, 89, 90, 91, 92, 92,
+        93, 94, 94, 95, 95, 96, 96, 97, 97, 97, 98, 98, 98, 99, 99, 99, 99, 99,
+        91, 90, 89, 88, 87, 88, 89, 89, 90, 90, 91, 92, 92, 93, 94, 94, 95, 95,
+        96, 96, 97, 97, 97, 98, 98, 98, 99, 99, 99, 99, 99, 99, 92, 91, 90, 90,
+        89, 89, 90, 90, 91, 91, 92, 92, 93, 94, 94, 95, 95, 96, 96, 97, 97, 97,
+        98, 98, 98, 99, 99, 99, 99, 99, 99, 99, 92, 91, 90, 90, 89, 89, 90, 90,
+        91, 91, 92, 92, 93, 94, 94, 95, 95, 96, 96, 97, 97, 97, 98, 98, 98, 99,
+        99, 99, 99, 99, 99, 99, 92, 91, 90, 90, 89, 89, 90, 90, 91, 91, 92, 92,
+        93, 94, 94, 95, 95, 96, 96, 97, 97, 97, 98, 98, 98, 99, 99, 99, 99, 99,
+        99, 99, 92, 91, 90, 90, 89, 89, 90, 90, 91, 91, 92, 92, 93, 94, 94, 95,
+        95, 96, 96, 97, 97, 97, 98, 98, 98, 99, 99, 99, 99, 99, 99, 99 },
+      { /* Intra matrices */
+        /* Size 4 */
+        44, 46, 58, 68, 46, 56, 64, 70, 58, 64, 71, 74, 68, 70, 74, 77,
+        /* Size 8 */
+        49, 42, 44, 50, 57, 63, 68, 71, 42, 45, 44, 48, 54, 60, 65, 69, 44, 44,
+        52, 56, 60, 63, 67, 70, 50, 48, 56, 61, 65, 68, 70, 72, 57, 54, 60, 65,
+        68, 71, 73, 74, 63, 60, 63, 68, 71, 73, 74, 76, 68, 65, 67, 70, 73, 74,
+        76, 77, 71, 69, 70, 72, 74, 76, 77, 78,
+        /* Size 16 */
+        48, 44, 41, 42, 43, 46, 49, 53, 56, 59, 62, 65, 67, 69, 70, 70, 44, 43,
+        43, 43, 43, 45, 48, 51, 55, 57, 61, 63, 65, 67, 69, 69, 41, 43, 44, 44,
+        43, 45, 47, 50, 53, 56, 59, 61, 64, 66, 68, 68, 42, 43, 44, 45, 47, 49,
+        51, 53, 56, 58, 61, 63, 65, 67, 68, 68, 43, 43, 43, 47, 51, 53, 55, 57,
+        59, 61, 63, 64, 66, 68, 69, 69, 46, 45, 45, 49, 53, 55, 58, 59, 61, 63,
+        64, 66, 68, 69, 70, 70, 49, 48, 47, 51, 55, 58, 61, 62, 64, 65, 67, 68,
+        69, 70, 71, 71, 53, 51, 50, 53, 57, 59, 62, 64, 66, 67, 68, 69, 70, 71,
+        72, 72, 56, 55, 53, 56, 59, 61, 64, 66, 67, 69, 70, 71, 72, 72, 73, 73,
+        59, 57, 56, 58, 61, 63, 65, 67, 69, 70, 71, 72, 72, 73, 74, 74, 62, 61,
+        59, 61, 63, 64, 67, 68, 70, 71, 72, 73, 73, 74, 75, 75, 65, 63, 61, 63,
+        64, 66, 68, 69, 71, 72, 73, 73, 74, 75, 75, 75, 67, 65, 64, 65, 66, 68,
+        69, 70, 72, 72, 73, 74, 75, 75, 76, 76, 69, 67, 66, 67, 68, 69, 70, 71,
+        72, 73, 74, 75, 75, 76, 76, 76, 70, 69, 68, 68, 69, 70, 71, 72, 73, 74,
+        75, 75, 76, 76, 76, 76, 70, 69, 68, 68, 69, 70, 71, 72, 73, 74, 75, 75,
+        76, 76, 76, 76,
+        /* Size 32 */
+        47, 46, 44, 42, 41, 41, 42, 42, 43, 44, 46, 47, 49, 51, 52, 54, 56, 57,
+        59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 70, 70, 70, 46, 44, 43, 42,
+        41, 42, 42, 42, 43, 44, 45, 47, 48, 50, 52, 53, 55, 56, 58, 59, 61, 62,
+        63, 64, 66, 66, 67, 68, 69, 69, 69, 69, 44, 43, 43, 43, 42, 42, 42, 43,
+        43, 44, 45, 46, 48, 49, 51, 52, 54, 56, 57, 58, 60, 61, 62, 64, 65, 66,
+        67, 68, 68, 68, 68, 68, 42, 42, 43, 43, 43, 43, 43, 43, 43, 44, 45, 46,
+        47, 49, 50, 52, 53, 55, 56, 58, 59, 60, 61, 63, 64, 65, 66, 67, 68, 68,
+        68, 68, 41, 41, 42, 43, 44, 44, 43, 43, 43, 44, 45, 46, 47, 48, 49, 51,
+        53, 54, 55, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 67, 67, 67, 41, 42,
+        42, 43, 44, 44, 44, 44, 45, 46, 46, 47, 48, 50, 51, 52, 54, 55, 56, 58,
+        59, 60, 61, 63, 64, 65, 66, 67, 68, 68, 68, 68, 42, 42, 42, 43, 43, 44,
+        45, 46, 47, 47, 48, 49, 50, 51, 53, 54, 55, 56, 58, 59, 60, 61, 62, 63,
+        64, 65, 66, 67, 68, 68, 68, 68, 42, 42, 43, 43, 43, 44, 46, 47, 49, 50,
+        50, 51, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 67,
+        68, 68, 68, 68, 43, 43, 43, 43, 43, 45, 47, 49, 51, 52, 53, 54, 55, 55,
+        56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 66, 67, 68, 69, 69, 69, 69,
+        44, 44, 44, 44, 44, 46, 47, 50, 52, 53, 54, 55, 56, 57, 58, 58, 59, 60,
+        61, 62, 63, 64, 65, 65, 66, 67, 68, 68, 69, 69, 69, 69, 46, 45, 45, 45,
+        45, 46, 48, 50, 53, 54, 55, 56, 57, 58, 59, 60, 61, 61, 62, 63, 64, 65,
+        65, 66, 67, 68, 68, 69, 70, 70, 70, 70, 47, 47, 46, 46, 46, 47, 49, 51,
+        54, 55, 56, 57, 59, 59, 60, 61, 62, 63, 63, 64, 65, 66, 66, 67, 68, 68,
+        69, 70, 70, 70, 70, 70, 49, 48, 48, 47, 47, 48, 50, 52, 55, 56, 57, 59,
+        60, 61, 62, 63, 63, 64, 65, 65, 66, 67, 67, 68, 69, 69, 70, 70, 71, 71,
+        71, 71, 51, 50, 49, 49, 48, 50, 51, 53, 55, 57, 58, 59, 61, 62, 63, 63,
+        64, 65, 65, 66, 67, 67, 68, 69, 69, 70, 70, 71, 71, 71, 71, 71, 52, 52,
+        51, 50, 49, 51, 53, 54, 56, 58, 59, 60, 62, 63, 63, 64, 65, 66, 66, 67,
+        68, 68, 69, 69, 70, 70, 71, 71, 72, 72, 72, 72, 54, 53, 52, 52, 51, 52,
+        54, 56, 57, 58, 60, 61, 63, 63, 64, 65, 66, 67, 67, 68, 68, 69, 69, 70,
+        70, 71, 71, 72, 72, 72, 72, 72, 56, 55, 54, 53, 53, 54, 55, 57, 58, 59,
+        61, 62, 63, 64, 65, 66, 67, 67, 68, 69, 69, 70, 70, 71, 71, 71, 72, 72,
+        72, 72, 72, 72, 57, 56, 56, 55, 54, 55, 56, 58, 59, 60, 61, 63, 64, 65,
+        66, 67, 67, 68, 69, 69, 70, 70, 71, 71, 71, 72, 72, 72, 73, 73, 73, 73,
+        59, 58, 57, 56, 55, 56, 58, 59, 60, 61, 62, 63, 65, 65, 66, 67, 68, 69,
+        69, 70, 70, 71, 71, 71, 72, 72, 73, 73, 73, 73, 73, 73, 60, 59, 58, 58,
+        57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 69, 70, 70, 71, 71,
+        72, 72, 72, 73, 73, 73, 74, 74, 74, 74, 62, 61, 60, 59, 58, 59, 60, 61,
+        62, 63, 64, 65, 66, 67, 68, 68, 69, 70, 70, 71, 71, 72, 72, 72, 73, 73,
+        73, 74, 74, 74, 74, 74, 63, 62, 61, 60, 59, 60, 61, 62, 63, 64, 65, 66,
+        67, 67, 68, 69, 70, 70, 71, 71, 72, 72, 72, 73, 73, 73, 74, 74, 74, 74,
+        74, 74, 64, 63, 62, 61, 61, 61, 62, 63, 64, 65, 65, 66, 67, 68, 69, 69,
+        70, 71, 71, 72, 72, 72, 73, 73, 73, 74, 74, 74, 75, 75, 75, 75, 65, 64,
+        64, 63, 62, 63, 63, 64, 65, 65, 66, 67, 68, 69, 69, 70, 71, 71, 71, 72,
+        72, 73, 73, 73, 74, 74, 74, 75, 75, 75, 75, 75, 66, 66, 65, 64, 63, 64,
+        64, 65, 66, 66, 67, 68, 69, 69, 70, 70, 71, 71, 72, 72, 73, 73, 73, 74,
+        74, 74, 75, 75, 75, 75, 75, 75, 67, 66, 66, 65, 64, 65, 65, 66, 66, 67,
+        68, 68, 69, 70, 70, 71, 71, 72, 72, 73, 73, 73, 74, 74, 74, 75, 75, 75,
+        75, 75, 75, 75, 68, 67, 67, 66, 65, 66, 66, 67, 67, 68, 68, 69, 70, 70,
+        71, 71, 72, 72, 73, 73, 73, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75,
+        69, 68, 68, 67, 66, 67, 67, 67, 68, 68, 69, 70, 70, 71, 71, 72, 72, 72,
+        73, 73, 74, 74, 74, 75, 75, 75, 75, 75, 76, 76, 76, 76, 70, 69, 68, 68,
+        67, 68, 68, 68, 69, 69, 70, 70, 71, 71, 72, 72, 72, 73, 73, 74, 74, 74,
+        75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 70, 69, 68, 68, 67, 68, 68, 68,
+        69, 69, 70, 70, 71, 71, 72, 72, 72, 73, 73, 74, 74, 74, 75, 75, 75, 75,
+        75, 76, 76, 76, 76, 76, 70, 69, 68, 68, 67, 68, 68, 68, 69, 69, 70, 70,
+        71, 71, 72, 72, 72, 73, 73, 74, 74, 74, 75, 75, 75, 75, 75, 76, 76, 76,
+        76, 76, 70, 69, 68, 68, 67, 68, 68, 68, 69, 69, 70, 70, 71, 71, 72, 72,
+        72, 73, 73, 74, 74, 74, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 80, 83, 90, 80, 86, 88, 92, 83, 88, 96, 99, 90, 92, 99, 104,
+        /* Size 8 */
+        64, 57, 77, 78, 80, 84, 88, 91, 57, 69, 77, 74, 75, 78, 82, 86, 77, 77,
+        82, 81, 81, 82, 85, 88, 78, 74, 81, 85, 86, 87, 89, 91, 80, 75, 81, 86,
+        89, 91, 93, 95, 84, 78, 82, 87, 91, 94, 96, 97, 88, 82, 85, 89, 93, 96,
+        98, 99, 91, 86, 88, 91, 95, 97, 99, 101,
+        /* Size 16 */
+        64, 60, 57, 65, 77, 77, 78, 79, 80, 82, 84, 86, 88, 89, 91, 91, 60, 61,
+        62, 69, 77, 76, 76, 77, 78, 79, 81, 83, 85, 86, 88, 88, 57, 62, 69, 73,
+        77, 75, 74, 74, 75, 76, 78, 80, 82, 84, 86, 86, 65, 69, 73, 76, 80, 78,
+        77, 78, 78, 79, 80, 82, 83, 85, 87, 87, 77, 77, 77, 80, 82, 82, 81, 81,
+        81, 82, 82, 84, 85, 87, 88, 88, 77, 76, 75, 78, 82, 82, 83, 83, 83, 84,
+        85, 86, 87, 88, 90, 90, 78, 76, 74, 77, 81, 83, 85, 85, 86, 87, 87, 88,
+        89, 90, 91, 91, 79, 77, 74, 78, 81, 83, 85, 87, 88, 89, 89, 90, 91, 92,
+        93, 93, 80, 78, 75, 78, 81, 83, 86, 88, 89, 90, 91, 92, 93, 94, 95, 95,
+        82, 79, 76, 79, 82, 84, 87, 89, 90, 92, 93, 94, 94, 95, 96, 96, 84, 81,
+        78, 80, 82, 85, 87, 89, 91, 93, 94, 95, 96, 97, 97, 97, 86, 83, 80, 82,
+        84, 86, 88, 90, 92, 94, 95, 96, 97, 98, 98, 98, 88, 85, 82, 83, 85, 87,
+        89, 91, 93, 94, 96, 97, 98, 99, 99, 99, 89, 86, 84, 85, 87, 88, 90, 92,
+        94, 95, 97, 98, 99, 99, 100, 100, 91, 88, 86, 87, 88, 90, 91, 93, 95,
+        96, 97, 98, 99, 100, 101, 101, 91, 88, 86, 87, 88, 90, 91, 93, 95, 96,
+        97, 98, 99, 100, 101, 101,
+        /* Size 32 */
+        64, 62, 60, 59, 57, 61, 65, 70, 77, 77, 77, 78, 78, 79, 79, 80, 80, 81,
+        82, 83, 84, 85, 86, 87, 88, 88, 89, 90, 91, 91, 91, 91, 62, 61, 61, 60,
+        60, 63, 67, 72, 77, 77, 77, 77, 77, 78, 78, 79, 79, 80, 81, 81, 82, 83,
+        84, 85, 86, 87, 88, 89, 90, 90, 90, 90, 60, 61, 61, 62, 62, 65, 69, 73,
+        77, 77, 76, 76, 76, 76, 77, 77, 78, 78, 79, 80, 81, 82, 83, 84, 85, 85,
+        86, 87, 88, 88, 88, 88, 59, 60, 62, 64, 66, 68, 71, 74, 77, 76, 76, 75,
+        75, 75, 76, 76, 76, 77, 78, 79, 79, 80, 81, 82, 83, 84, 85, 86, 87, 87,
+        87, 87, 57, 60, 62, 66, 69, 71, 73, 75, 77, 76, 75, 75, 74, 74, 74, 75,
+        75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 86, 86, 86, 61, 63,
+        65, 68, 71, 73, 74, 76, 78, 78, 77, 76, 76, 76, 76, 76, 76, 77, 78, 78,
+        79, 80, 81, 82, 82, 83, 84, 85, 86, 86, 86, 86, 65, 67, 69, 71, 73, 74,
+        76, 78, 80, 79, 78, 78, 77, 77, 78, 78, 78, 78, 79, 80, 80, 81, 82, 82,
+        83, 84, 85, 86, 87, 87, 87, 87, 70, 72, 73, 74, 75, 76, 78, 79, 81, 80,
+        80, 80, 79, 79, 79, 79, 79, 80, 80, 81, 81, 82, 83, 83, 84, 85, 86, 87,
+        87, 87, 87, 87, 77, 77, 77, 77, 77, 78, 80, 81, 82, 82, 82, 81, 81, 81,
+        81, 81, 81, 81, 82, 82, 82, 83, 84, 84, 85, 86, 87, 87, 88, 88, 88, 88,
+        77, 77, 77, 76, 76, 78, 79, 80, 82, 82, 82, 82, 82, 82, 82, 82, 82, 83,
+        83, 83, 84, 84, 85, 85, 86, 87, 87, 88, 89, 89, 89, 89, 77, 77, 76, 76,
+        75, 77, 78, 80, 82, 82, 82, 83, 83, 83, 83, 83, 83, 84, 84, 85, 85, 85,
+        86, 87, 87, 88, 88, 89, 90, 90, 90, 90, 78, 77, 76, 75, 75, 76, 78, 80,
+        81, 82, 83, 83, 84, 84, 84, 85, 85, 85, 86, 86, 86, 87, 87, 88, 88, 89,
+        89, 90, 90, 90, 90, 90, 78, 77, 76, 75, 74, 76, 77, 79, 81, 82, 83, 84,
+        85, 85, 85, 86, 86, 87, 87, 87, 87, 88, 88, 89, 89, 90, 90, 91, 91, 91,
+        91, 91, 79, 78, 76, 75, 74, 76, 77, 79, 81, 82, 83, 84, 85, 86, 86, 87,
+        87, 87, 88, 88, 88, 89, 89, 90, 90, 91, 91, 92, 92, 92, 92, 92, 79, 78,
+        77, 76, 74, 76, 78, 79, 81, 82, 83, 84, 85, 86, 87, 87, 88, 88, 89, 89,
+        89, 90, 90, 91, 91, 92, 92, 92, 93, 93, 93, 93, 80, 79, 77, 76, 75, 76,
+        78, 79, 81, 82, 83, 85, 86, 87, 87, 88, 89, 89, 89, 90, 90, 91, 91, 92,
+        92, 92, 93, 93, 94, 94, 94, 94, 80, 79, 78, 76, 75, 76, 78, 79, 81, 82,
+        83, 85, 86, 87, 88, 89, 89, 90, 90, 91, 91, 92, 92, 93, 93, 93, 94, 94,
+        95, 95, 95, 95, 81, 80, 78, 77, 76, 77, 78, 80, 81, 83, 84, 85, 87, 87,
+        88, 89, 90, 90, 91, 91, 92, 92, 93, 93, 94, 94, 94, 95, 95, 95, 95, 95,
+        82, 81, 79, 78, 76, 78, 79, 80, 82, 83, 84, 86, 87, 88, 89, 89, 90, 91,
+        92, 92, 93, 93, 94, 94, 94, 95, 95, 96, 96, 96, 96, 96, 83, 81, 80, 79,
+        77, 78, 80, 81, 82, 83, 85, 86, 87, 88, 89, 90, 91, 91, 92, 93, 93, 94,
+        94, 95, 95, 95, 96, 96, 97, 97, 97, 97, 84, 82, 81, 79, 78, 79, 80, 81,
+        82, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 93, 94, 94, 95, 95, 96, 96,
+        97, 97, 97, 97, 97, 97, 85, 83, 82, 80, 79, 80, 81, 82, 83, 84, 85, 87,
+        88, 89, 90, 91, 92, 92, 93, 94, 94, 95, 95, 96, 96, 97, 97, 97, 98, 98,
+        98, 98, 86, 84, 83, 81, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
+        92, 93, 94, 94, 95, 95, 96, 96, 97, 97, 98, 98, 98, 98, 98, 98, 87, 85,
+        84, 82, 81, 82, 82, 83, 84, 85, 87, 88, 89, 90, 91, 92, 93, 93, 94, 95,
+        95, 96, 96, 97, 97, 98, 98, 98, 99, 99, 99, 99, 88, 86, 85, 83, 82, 82,
+        83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 96, 97, 97,
+        98, 98, 99, 99, 99, 99, 99, 99, 88, 87, 85, 84, 83, 83, 84, 85, 86, 87,
+        88, 89, 90, 91, 92, 92, 93, 94, 95, 95, 96, 97, 97, 98, 98, 99, 99, 99,
+        100, 100, 100, 100, 89, 88, 86, 85, 84, 84, 85, 86, 87, 87, 88, 89, 90,
+        91, 92, 93, 94, 94, 95, 96, 97, 97, 98, 98, 99, 99, 99, 100, 100, 100,
+        100, 100, 90, 89, 87, 86, 85, 85, 86, 87, 87, 88, 89, 90, 91, 92, 92,
+        93, 94, 95, 96, 96, 97, 97, 98, 98, 99, 99, 100, 100, 101, 101, 101,
+        101, 91, 90, 88, 87, 86, 86, 87, 87, 88, 89, 90, 90, 91, 92, 93, 94, 95,
+        95, 96, 97, 97, 98, 98, 99, 99, 100, 100, 101, 101, 101, 101, 101, 91,
+        90, 88, 87, 86, 86, 87, 87, 88, 89, 90, 90, 91, 92, 93, 94, 95, 95, 96,
+        97, 97, 98, 98, 99, 99, 100, 100, 101, 101, 101, 101, 101, 91, 90, 88,
+        87, 86, 86, 87, 87, 88, 89, 90, 90, 91, 92, 93, 94, 95, 95, 96, 97, 97,
+        98, 98, 99, 99, 100, 100, 101, 101, 101, 101, 101, 91, 90, 88, 87, 86,
+        86, 87, 87, 88, 89, 90, 90, 91, 92, 93, 94, 95, 95, 96, 97, 97, 98, 98,
+        99, 99, 100, 100, 101, 101, 101, 101, 101 },
+      { /* Intra matrices */
+        /* Size 4 */
+        45, 57, 59, 65, 57, 62, 63, 67, 59, 63, 69, 72, 65, 67, 72, 76,
+        /* Size 8 */
+        47, 42, 57, 59, 60, 63, 66, 69, 42, 51, 57, 55, 56, 58, 61, 64, 57, 57,
+        61, 61, 61, 62, 64, 66, 59, 55, 61, 64, 65, 66, 67, 69, 60, 56, 61, 65,
+        67, 69, 70, 71, 63, 58, 62, 66, 69, 71, 72, 74, 66, 61, 64, 67, 70, 72,
+        74, 75, 69, 64, 66, 69, 71, 74, 75, 77,
+        /* Size 16 */
+        47, 44, 41, 48, 56, 57, 58, 59, 60, 61, 62, 64, 65, 67, 68, 68, 44, 45,
+        46, 50, 57, 56, 56, 57, 57, 58, 60, 61, 63, 64, 66, 66, 41, 46, 51, 54,
+        57, 56, 54, 55, 55, 56, 57, 59, 60, 62, 63, 63, 48, 50, 54, 56, 59, 58,
+        57, 57, 57, 58, 59, 60, 62, 63, 64, 64, 56, 57, 57, 59, 61, 60, 60, 60,
+        60, 60, 61, 62, 63, 64, 65, 65, 57, 56, 56, 58, 60, 61, 61, 62, 62, 62,
+        63, 64, 65, 66, 67, 67, 58, 56, 54, 57, 60, 61, 63, 63, 64, 65, 65, 66,
+        66, 67, 68, 68, 59, 57, 55, 57, 60, 62, 63, 64, 65, 66, 67, 67, 68, 69,
+        69, 69, 60, 57, 55, 57, 60, 62, 64, 65, 67, 67, 68, 69, 69, 70, 71, 71,
+        61, 58, 56, 58, 60, 62, 65, 66, 67, 68, 69, 70, 71, 71, 72, 72, 62, 60,
+        57, 59, 61, 63, 65, 67, 68, 69, 70, 71, 72, 72, 73, 73, 64, 61, 59, 60,
+        62, 64, 66, 67, 69, 70, 71, 72, 73, 73, 74, 74, 65, 63, 60, 62, 63, 65,
+        66, 68, 69, 71, 72, 73, 73, 74, 75, 75, 67, 64, 62, 63, 64, 66, 67, 69,
+        70, 71, 72, 73, 74, 75, 75, 75, 68, 66, 63, 64, 65, 67, 68, 69, 71, 72,
+        73, 74, 75, 75, 76, 76, 68, 66, 63, 64, 65, 67, 68, 69, 71, 72, 73, 74,
+        75, 75, 76, 76,
+        /* Size 32 */
+        46, 45, 44, 42, 41, 44, 47, 51, 56, 56, 57, 57, 58, 58, 58, 59, 59, 60,
+        61, 61, 62, 63, 63, 64, 65, 65, 66, 67, 68, 68, 68, 68, 45, 45, 44, 44,
+        43, 46, 49, 52, 56, 56, 56, 57, 57, 57, 57, 58, 58, 59, 59, 60, 61, 61,
+        62, 63, 64, 64, 65, 66, 66, 66, 66, 66, 44, 44, 44, 45, 45, 48, 50, 53,
+        56, 56, 56, 56, 56, 56, 56, 57, 57, 58, 58, 59, 59, 60, 61, 62, 62, 63,
+        64, 65, 65, 65, 65, 65, 42, 44, 45, 46, 48, 50, 52, 54, 56, 56, 56, 55,
+        55, 55, 55, 56, 56, 56, 57, 58, 58, 59, 60, 60, 61, 62, 63, 63, 64, 64,
+        64, 64, 41, 43, 45, 48, 50, 52, 53, 55, 57, 56, 55, 55, 54, 54, 54, 55,
+        55, 55, 56, 57, 57, 58, 59, 59, 60, 61, 62, 62, 63, 63, 63, 63, 44, 46,
+        48, 50, 52, 53, 54, 56, 57, 57, 56, 56, 55, 56, 56, 56, 56, 56, 57, 57,
+        58, 59, 59, 60, 61, 61, 62, 63, 64, 64, 64, 64, 47, 49, 50, 52, 53, 54,
+        56, 57, 58, 58, 58, 57, 57, 57, 57, 57, 57, 58, 58, 58, 59, 60, 60, 61,
+        61, 62, 63, 63, 64, 64, 64, 64, 51, 52, 53, 54, 55, 56, 57, 58, 59, 59,
+        59, 58, 58, 58, 58, 58, 58, 59, 59, 59, 60, 60, 61, 62, 62, 63, 63, 64,
+        65, 65, 65, 65, 56, 56, 56, 56, 57, 57, 58, 59, 60, 60, 60, 60, 60, 60,
+        60, 60, 60, 60, 60, 60, 61, 61, 62, 62, 63, 63, 64, 65, 65, 65, 65, 65,
+        56, 56, 56, 56, 56, 57, 58, 59, 60, 60, 60, 60, 60, 60, 60, 60, 61, 61,
+        61, 61, 62, 62, 63, 63, 64, 64, 65, 65, 66, 66, 66, 66, 57, 56, 56, 56,
+        55, 56, 58, 59, 60, 60, 61, 61, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63,
+        64, 64, 64, 65, 65, 66, 66, 66, 66, 66, 57, 57, 56, 55, 55, 56, 57, 58,
+        60, 60, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, 64, 64, 64, 65, 65, 66,
+        66, 67, 67, 67, 67, 67, 58, 57, 56, 55, 54, 55, 57, 58, 60, 60, 61, 62,
+        63, 63, 63, 63, 64, 64, 64, 64, 65, 65, 65, 66, 66, 66, 67, 67, 68, 68,
+        68, 68, 58, 57, 56, 55, 54, 56, 57, 58, 60, 60, 61, 62, 63, 63, 64, 64,
+        64, 65, 65, 65, 65, 66, 66, 66, 67, 67, 68, 68, 68, 68, 68, 68, 58, 57,
+        56, 55, 54, 56, 57, 58, 60, 60, 61, 62, 63, 64, 64, 64, 65, 65, 66, 66,
+        66, 67, 67, 67, 68, 68, 68, 69, 69, 69, 69, 69, 59, 58, 57, 56, 55, 56,
+        57, 58, 60, 60, 61, 62, 63, 64, 64, 65, 66, 66, 66, 67, 67, 67, 68, 68,
+        68, 69, 69, 69, 70, 70, 70, 70, 59, 58, 57, 56, 55, 56, 57, 58, 60, 61,
+        62, 63, 64, 64, 65, 66, 66, 67, 67, 67, 68, 68, 68, 69, 69, 69, 70, 70,
+        70, 70, 70, 70, 60, 59, 58, 56, 55, 56, 58, 59, 60, 61, 62, 63, 64, 65,
+        65, 66, 67, 67, 67, 68, 68, 69, 69, 69, 70, 70, 70, 71, 71, 71, 71, 71,
+        61, 59, 58, 57, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 66, 67, 67,
+        68, 68, 69, 69, 69, 70, 70, 70, 71, 71, 71, 71, 71, 71, 61, 60, 59, 58,
+        57, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 67, 68, 68, 69, 69, 70,
+        70, 70, 71, 71, 71, 72, 72, 72, 72, 72, 62, 61, 59, 58, 57, 58, 59, 60,
+        61, 62, 63, 64, 65, 65, 66, 67, 68, 68, 69, 69, 70, 70, 71, 71, 71, 72,
+        72, 72, 72, 72, 72, 72, 63, 61, 60, 59, 58, 59, 60, 60, 61, 62, 63, 64,
+        65, 66, 67, 67, 68, 69, 69, 70, 70, 71, 71, 71, 72, 72, 72, 73, 73, 73,
+        73, 73, 63, 62, 61, 60, 59, 59, 60, 61, 62, 63, 64, 64, 65, 66, 67, 68,
+        68, 69, 69, 70, 71, 71, 71, 72, 72, 72, 73, 73, 73, 73, 73, 73, 64, 63,
+        62, 60, 59, 60, 61, 62, 62, 63, 64, 65, 66, 66, 67, 68, 69, 69, 70, 70,
+        71, 71, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 65, 64, 62, 61, 60, 61,
+        61, 62, 63, 64, 64, 65, 66, 67, 68, 68, 69, 70, 70, 71, 71, 72, 72, 73,
+        73, 73, 74, 74, 74, 74, 74, 74, 65, 64, 63, 62, 61, 61, 62, 63, 63, 64,
+        65, 66, 66, 67, 68, 69, 69, 70, 70, 71, 72, 72, 72, 73, 73, 74, 74, 74,
+        74, 74, 74, 74, 66, 65, 64, 63, 62, 62, 63, 63, 64, 65, 65, 66, 67, 68,
+        68, 69, 70, 70, 71, 71, 72, 72, 73, 73, 74, 74, 74, 74, 75, 75, 75, 75,
+        67, 66, 65, 63, 62, 63, 63, 64, 65, 65, 66, 67, 67, 68, 69, 69, 70, 71,
+        71, 72, 72, 73, 73, 73, 74, 74, 74, 75, 75, 75, 75, 75, 68, 66, 65, 64,
+        63, 64, 64, 65, 65, 66, 66, 67, 68, 68, 69, 70, 70, 71, 71, 72, 72, 73,
+        73, 74, 74, 74, 75, 75, 75, 75, 75, 75, 68, 66, 65, 64, 63, 64, 64, 65,
+        65, 66, 66, 67, 68, 68, 69, 70, 70, 71, 71, 72, 72, 73, 73, 74, 74, 74,
+        75, 75, 75, 75, 75, 75, 68, 66, 65, 64, 63, 64, 64, 65, 65, 66, 66, 67,
+        68, 68, 69, 70, 70, 71, 71, 72, 72, 73, 73, 74, 74, 74, 75, 75, 75, 75,
+        75, 75, 68, 66, 65, 64, 63, 64, 64, 65, 65, 66, 66, 67, 68, 68, 69, 70,
+        70, 71, 71, 72, 72, 73, 73, 74, 74, 74, 75, 75, 75, 75, 75, 75 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 67, 80, 89, 67, 77, 85, 91, 80, 85, 92, 95, 89, 91, 95, 97,
+        /* Size 8 */
+        64, 57, 59, 66, 73, 78, 82, 85, 57, 60, 59, 63, 69, 75, 80, 83, 59, 59,
+        68, 71, 75, 78, 82, 84, 66, 63, 71, 77, 80, 82, 84, 86, 73, 69, 75, 80,
+        83, 85, 86, 88, 78, 75, 78, 82, 85, 87, 88, 89, 82, 80, 82, 84, 86, 88,
+        89, 90, 85, 83, 84, 86, 88, 89, 90, 91,
+        /* Size 16 */
+        64, 60, 57, 58, 59, 62, 66, 69, 73, 75, 78, 80, 82, 84, 85, 85, 60, 59,
+        58, 59, 59, 62, 64, 68, 71, 74, 77, 79, 81, 83, 84, 84, 57, 58, 60, 60,
+        59, 61, 63, 66, 69, 72, 75, 77, 80, 81, 83, 83, 58, 59, 60, 61, 63, 65,
+        67, 69, 72, 74, 77, 79, 81, 82, 84, 84, 59, 59, 59, 63, 68, 69, 71, 73,
+        75, 77, 78, 80, 82, 83, 84, 84, 62, 62, 61, 65, 69, 72, 74, 75, 77, 79,
+        80, 82, 83, 84, 85, 85, 66, 64, 63, 67, 71, 74, 77, 78, 80, 81, 82, 83,
+        84, 85, 86, 86, 69, 68, 66, 69, 73, 75, 78, 80, 81, 82, 83, 84, 85, 86,
+        87, 87, 73, 71, 69, 72, 75, 77, 80, 81, 83, 84, 85, 86, 86, 87, 88, 88,
+        75, 74, 72, 74, 77, 79, 81, 82, 84, 85, 86, 86, 87, 88, 88, 88, 78, 77,
+        75, 77, 78, 80, 82, 83, 85, 86, 87, 87, 88, 88, 89, 89, 80, 79, 77, 79,
+        80, 82, 83, 84, 86, 86, 87, 88, 89, 89, 89, 89, 82, 81, 80, 81, 82, 83,
+        84, 85, 86, 87, 88, 89, 89, 89, 90, 90, 84, 83, 81, 82, 83, 84, 85, 86,
+        87, 88, 88, 89, 89, 90, 90, 90, 85, 84, 83, 84, 84, 85, 86, 87, 88, 88,
+        89, 89, 90, 90, 91, 91, 85, 84, 83, 84, 84, 85, 86, 87, 88, 88, 89, 89,
+        90, 90, 91, 91,
+        /* Size 32 */
+        64, 62, 60, 58, 57, 57, 58, 58, 59, 60, 62, 64, 66, 67, 69, 71, 73, 74,
+        75, 77, 78, 79, 80, 81, 82, 83, 84, 85, 85, 85, 85, 85, 62, 61, 60, 59,
+        57, 58, 58, 58, 59, 60, 62, 63, 65, 67, 68, 70, 72, 73, 75, 76, 77, 78,
+        79, 81, 82, 82, 83, 84, 85, 85, 85, 85, 60, 60, 59, 59, 58, 58, 59, 59,
+        59, 60, 62, 63, 64, 66, 68, 69, 71, 72, 74, 75, 77, 78, 79, 80, 81, 82,
+        83, 83, 84, 84, 84, 84, 58, 59, 59, 59, 59, 59, 59, 59, 59, 60, 61, 63,
+        64, 65, 67, 68, 70, 71, 73, 74, 76, 77, 78, 79, 80, 81, 82, 83, 84, 84,
+        84, 84, 57, 57, 58, 59, 60, 60, 60, 59, 59, 60, 61, 62, 63, 65, 66, 68,
+        69, 71, 72, 73, 75, 76, 77, 78, 80, 80, 81, 82, 83, 83, 83, 83, 57, 58,
+        58, 59, 60, 60, 60, 61, 61, 62, 63, 64, 65, 66, 68, 69, 71, 72, 73, 74,
+        76, 77, 78, 79, 80, 81, 82, 83, 83, 83, 83, 83, 58, 58, 59, 59, 60, 60,
+        61, 62, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73, 74, 75, 77, 78, 79, 80,
+        81, 81, 82, 83, 84, 84, 84, 84, 58, 58, 59, 59, 59, 61, 62, 64, 65, 66,
+        67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 78, 78, 79, 80, 81, 82, 83, 83,
+        84, 84, 84, 84, 59, 59, 59, 59, 59, 61, 63, 65, 68, 69, 69, 70, 71, 72,
+        73, 74, 75, 76, 77, 77, 78, 79, 80, 81, 82, 82, 83, 84, 84, 84, 84, 84,
+        60, 60, 60, 60, 60, 62, 64, 66, 69, 69, 70, 71, 73, 73, 74, 75, 76, 77,
+        78, 78, 79, 80, 81, 82, 82, 83, 84, 84, 85, 85, 85, 85, 62, 62, 62, 61,
+        61, 63, 65, 67, 69, 70, 72, 73, 74, 75, 75, 76, 77, 78, 79, 79, 80, 81,
+        82, 82, 83, 84, 84, 85, 85, 85, 85, 85, 64, 63, 63, 63, 62, 64, 66, 68,
+        70, 71, 73, 74, 75, 76, 77, 78, 78, 79, 80, 80, 81, 82, 82, 83, 84, 84,
+        85, 85, 86, 86, 86, 86, 66, 65, 64, 64, 63, 65, 67, 69, 71, 73, 74, 75,
+        77, 77, 78, 79, 80, 80, 81, 81, 82, 83, 83, 84, 84, 85, 85, 86, 86, 86,
+        86, 86, 67, 67, 66, 65, 65, 66, 68, 70, 72, 73, 75, 76, 77, 78, 79, 80,
+        80, 81, 82, 82, 83, 83, 84, 84, 85, 85, 86, 86, 86, 86, 86, 86, 69, 68,
+        68, 67, 66, 68, 69, 71, 73, 74, 75, 77, 78, 79, 80, 80, 81, 82, 82, 83,
+        83, 84, 84, 85, 85, 86, 86, 86, 87, 87, 87, 87, 71, 70, 69, 68, 68, 69,
+        71, 72, 74, 75, 76, 78, 79, 80, 80, 81, 82, 83, 83, 84, 84, 85, 85, 85,
+        86, 86, 87, 87, 87, 87, 87, 87, 73, 72, 71, 70, 69, 71, 72, 73, 75, 76,
+        77, 78, 80, 80, 81, 82, 83, 83, 84, 84, 85, 85, 86, 86, 86, 87, 87, 87,
+        88, 88, 88, 88, 74, 73, 72, 71, 71, 72, 73, 74, 76, 77, 78, 79, 80, 81,
+        82, 83, 83, 84, 84, 85, 85, 86, 86, 86, 87, 87, 87, 88, 88, 88, 88, 88,
+        75, 75, 74, 73, 72, 73, 74, 75, 77, 78, 79, 80, 81, 82, 82, 83, 84, 84,
+        85, 85, 86, 86, 86, 87, 87, 87, 88, 88, 88, 88, 88, 88, 77, 76, 75, 74,
+        73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 84, 85, 85, 86, 86, 87,
+        87, 87, 88, 88, 88, 88, 89, 89, 89, 89, 78, 77, 77, 76, 75, 76, 77, 78,
+        78, 79, 80, 81, 82, 83, 83, 84, 85, 85, 86, 86, 87, 87, 87, 88, 88, 88,
+        88, 89, 89, 89, 89, 89, 79, 78, 78, 77, 76, 77, 78, 78, 79, 80, 81, 82,
+        83, 83, 84, 85, 85, 86, 86, 87, 87, 87, 88, 88, 88, 88, 89, 89, 89, 89,
+        89, 89, 80, 79, 79, 78, 77, 78, 79, 79, 80, 81, 82, 82, 83, 84, 84, 85,
+        86, 86, 86, 87, 87, 88, 88, 88, 89, 89, 89, 89, 89, 89, 89, 89, 81, 81,
+        80, 79, 78, 79, 80, 80, 81, 82, 82, 83, 84, 84, 85, 85, 86, 86, 87, 87,
+        88, 88, 88, 88, 89, 89, 89, 89, 90, 90, 90, 90, 82, 82, 81, 80, 80, 80,
+        81, 81, 82, 82, 83, 84, 84, 85, 85, 86, 86, 87, 87, 88, 88, 88, 89, 89,
+        89, 89, 89, 90, 90, 90, 90, 90, 83, 82, 82, 81, 80, 81, 81, 82, 82, 83,
+        84, 84, 85, 85, 86, 86, 87, 87, 87, 88, 88, 88, 89, 89, 89, 89, 90, 90,
+        90, 90, 90, 90, 84, 83, 83, 82, 81, 82, 82, 83, 83, 84, 84, 85, 85, 86,
+        86, 87, 87, 87, 88, 88, 88, 89, 89, 89, 89, 90, 90, 90, 90, 90, 90, 90,
+        85, 84, 83, 83, 82, 83, 83, 83, 84, 84, 85, 85, 86, 86, 86, 87, 87, 88,
+        88, 88, 89, 89, 89, 89, 90, 90, 90, 90, 90, 90, 90, 90, 85, 85, 84, 84,
+        83, 83, 84, 84, 84, 85, 85, 86, 86, 86, 87, 87, 88, 88, 88, 89, 89, 89,
+        89, 90, 90, 90, 90, 90, 91, 91, 91, 91, 85, 85, 84, 84, 83, 83, 84, 84,
+        84, 85, 85, 86, 86, 86, 87, 87, 88, 88, 88, 89, 89, 89, 89, 90, 90, 90,
+        90, 90, 91, 91, 91, 91, 85, 85, 84, 84, 83, 83, 84, 84, 84, 85, 85, 86,
+        86, 86, 87, 87, 88, 88, 88, 89, 89, 89, 89, 90, 90, 90, 90, 90, 91, 91,
+        91, 91, 85, 85, 84, 84, 83, 83, 84, 84, 84, 85, 85, 86, 86, 86, 87, 87,
+        88, 88, 88, 89, 89, 89, 89, 90, 90, 90, 90, 90, 91, 91, 91, 91 },
+      { /* Intra matrices */
+        /* Size 4 */
+        48, 50, 60, 68, 50, 58, 64, 69, 60, 64, 70, 72, 68, 69, 72, 74,
+        /* Size 8 */
+        52, 45, 47, 53, 59, 64, 68, 70, 45, 48, 47, 51, 56, 61, 65, 68, 47, 47,
+        55, 58, 61, 64, 67, 69, 53, 51, 58, 62, 65, 67, 69, 71, 59, 56, 61, 65,
+        68, 70, 71, 72, 64, 61, 64, 67, 70, 71, 72, 73, 68, 65, 67, 69, 71, 72,
+        73, 74, 70, 68, 69, 71, 72, 73, 74, 75,
+        /* Size 16 */
+        51, 48, 45, 46, 47, 49, 52, 55, 58, 61, 63, 65, 67, 68, 69, 69, 48, 47,
+        46, 46, 47, 49, 51, 54, 57, 59, 62, 63, 65, 67, 68, 68, 45, 46, 48, 47,
+        47, 48, 50, 53, 55, 58, 60, 62, 64, 66, 67, 67, 46, 46, 47, 49, 50, 52,
+        53, 55, 58, 60, 62, 63, 65, 66, 68, 68, 47, 47, 47, 50, 54, 56, 57, 59,
+        60, 62, 63, 65, 66, 67, 68, 68, 49, 49, 48, 52, 56, 57, 59, 61, 62, 63,
+        65, 66, 67, 68, 69, 69, 52, 51, 50, 53, 57, 59, 62, 63, 64, 65, 66, 67,
+        68, 69, 70, 70, 55, 54, 53, 55, 59, 61, 63, 64, 66, 67, 68, 68, 69, 70,
+        71, 71, 58, 57, 55, 58, 60, 62, 64, 66, 67, 68, 69, 69, 70, 71, 71, 71,
+        61, 59, 58, 60, 62, 63, 65, 67, 68, 69, 70, 70, 71, 71, 72, 72, 63, 62,
+        60, 62, 63, 65, 66, 68, 69, 70, 70, 71, 71, 72, 72, 72, 65, 63, 62, 63,
+        65, 66, 67, 68, 69, 70, 71, 71, 72, 72, 73, 73, 67, 65, 64, 65, 66, 67,
+        68, 69, 70, 71, 71, 72, 72, 73, 73, 73, 68, 67, 66, 66, 67, 68, 69, 70,
+        71, 71, 72, 72, 73, 73, 73, 73, 69, 68, 67, 68, 68, 69, 70, 71, 71, 72,
+        72, 73, 73, 73, 74, 74, 69, 68, 67, 68, 68, 69, 70, 71, 71, 72, 72, 73,
+        73, 73, 74, 74,
+        /* Size 32 */
+        51, 49, 47, 46, 45, 45, 45, 46, 46, 48, 49, 50, 52, 53, 55, 56, 58, 59,
+        60, 61, 63, 64, 64, 65, 66, 67, 68, 68, 69, 69, 69, 69, 49, 48, 47, 46,
+        45, 46, 46, 46, 46, 48, 49, 50, 52, 53, 54, 56, 57, 58, 59, 61, 62, 63,
+        64, 65, 66, 66, 67, 68, 68, 68, 68, 68, 47, 47, 47, 46, 46, 46, 46, 46,
+        46, 47, 49, 50, 51, 52, 54, 55, 56, 58, 59, 60, 61, 62, 63, 64, 65, 66,
+        66, 67, 68, 68, 68, 68, 46, 46, 46, 47, 47, 47, 47, 47, 46, 47, 48, 49,
+        50, 52, 53, 54, 56, 57, 58, 59, 61, 61, 62, 63, 64, 65, 66, 67, 67, 67,
+        67, 67, 45, 45, 46, 47, 47, 47, 47, 47, 47, 47, 48, 49, 50, 51, 52, 54,
+        55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 65, 66, 67, 67, 67, 67, 45, 46,
+        46, 47, 47, 47, 48, 48, 48, 49, 50, 51, 51, 53, 54, 55, 56, 57, 58, 59,
+        61, 61, 62, 63, 64, 65, 66, 66, 67, 67, 67, 67, 45, 46, 46, 47, 47, 48,
+        48, 49, 50, 51, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
+        65, 65, 66, 67, 67, 67, 67, 67, 46, 46, 46, 47, 47, 48, 49, 50, 52, 52,
+        53, 54, 55, 56, 57, 58, 58, 59, 60, 61, 62, 63, 64, 64, 65, 66, 66, 67,
+        68, 68, 68, 68, 46, 46, 46, 46, 47, 48, 50, 52, 54, 54, 55, 56, 57, 57,
+        58, 59, 60, 60, 61, 62, 63, 64, 64, 65, 66, 66, 67, 67, 68, 68, 68, 68,
+        48, 48, 47, 47, 47, 49, 51, 52, 54, 55, 56, 57, 58, 59, 59, 60, 61, 61,
+        62, 63, 64, 64, 65, 66, 66, 67, 67, 68, 68, 68, 68, 68, 49, 49, 49, 48,
+        48, 50, 51, 53, 55, 56, 57, 58, 59, 60, 60, 61, 62, 62, 63, 64, 64, 65,
+        66, 66, 67, 67, 68, 68, 69, 69, 69, 69, 50, 50, 50, 49, 49, 51, 52, 54,
+        56, 57, 58, 59, 60, 61, 61, 62, 63, 63, 64, 65, 65, 66, 66, 67, 67, 68,
+        68, 69, 69, 69, 69, 69, 52, 52, 51, 50, 50, 51, 53, 55, 57, 58, 59, 60,
+        61, 62, 63, 63, 64, 64, 65, 65, 66, 66, 67, 67, 68, 68, 69, 69, 69, 69,
+        69, 69, 53, 53, 52, 52, 51, 53, 54, 56, 57, 59, 60, 61, 62, 63, 63, 64,
+        65, 65, 66, 66, 67, 67, 67, 68, 68, 69, 69, 69, 70, 70, 70, 70, 55, 54,
+        54, 53, 52, 54, 55, 57, 58, 59, 60, 61, 63, 63, 64, 65, 65, 66, 66, 67,
+        67, 68, 68, 68, 69, 69, 69, 70, 70, 70, 70, 70, 56, 56, 55, 54, 54, 55,
+        56, 58, 59, 60, 61, 62, 63, 64, 65, 65, 66, 66, 67, 67, 68, 68, 68, 69,
+        69, 70, 70, 70, 70, 70, 70, 70, 58, 57, 56, 56, 55, 56, 57, 58, 60, 61,
+        62, 63, 64, 65, 65, 66, 67, 67, 67, 68, 68, 69, 69, 69, 70, 70, 70, 71,
+        71, 71, 71, 71, 59, 58, 58, 57, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,
+        66, 66, 67, 67, 68, 68, 69, 69, 69, 70, 70, 70, 71, 71, 71, 71, 71, 71,
+        60, 59, 59, 58, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 66, 67, 67, 68,
+        68, 69, 69, 69, 70, 70, 70, 71, 71, 71, 71, 71, 71, 71, 61, 61, 60, 59,
+        59, 59, 60, 61, 62, 63, 64, 65, 65, 66, 67, 67, 68, 68, 69, 69, 70, 70,
+        70, 70, 71, 71, 71, 71, 72, 72, 72, 72, 63, 62, 61, 61, 60, 61, 61, 62,
+        63, 64, 64, 65, 66, 67, 67, 68, 68, 69, 69, 70, 70, 70, 70, 71, 71, 71,
+        71, 72, 72, 72, 72, 72, 64, 63, 62, 61, 61, 61, 62, 63, 64, 64, 65, 66,
+        66, 67, 68, 68, 69, 69, 69, 70, 70, 70, 71, 71, 71, 71, 72, 72, 72, 72,
+        72, 72, 64, 64, 63, 62, 62, 62, 63, 64, 64, 65, 66, 66, 67, 67, 68, 68,
+        69, 69, 70, 70, 70, 71, 71, 71, 72, 72, 72, 72, 72, 72, 72, 72, 65, 65,
+        64, 63, 63, 63, 64, 64, 65, 66, 66, 67, 67, 68, 68, 69, 69, 70, 70, 70,
+        71, 71, 71, 72, 72, 72, 72, 72, 72, 72, 72, 72, 66, 66, 65, 64, 64, 64,
+        65, 65, 66, 66, 67, 67, 68, 68, 69, 69, 70, 70, 70, 71, 71, 71, 72, 72,
+        72, 72, 72, 73, 73, 73, 73, 73, 67, 66, 66, 65, 65, 65, 65, 66, 66, 67,
+        67, 68, 68, 69, 69, 70, 70, 70, 71, 71, 71, 71, 72, 72, 72, 72, 73, 73,
+        73, 73, 73, 73, 68, 67, 66, 66, 65, 66, 66, 66, 67, 67, 68, 68, 69, 69,
+        69, 70, 70, 71, 71, 71, 71, 72, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73,
+        68, 68, 67, 67, 66, 66, 67, 67, 67, 68, 68, 69, 69, 69, 70, 70, 71, 71,
+        71, 71, 72, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73, 73, 69, 68, 68, 67,
+        67, 67, 67, 68, 68, 68, 69, 69, 69, 70, 70, 70, 71, 71, 71, 72, 72, 72,
+        72, 72, 73, 73, 73, 73, 73, 73, 73, 73, 69, 68, 68, 67, 67, 67, 67, 68,
+        68, 68, 69, 69, 69, 70, 70, 70, 71, 71, 71, 72, 72, 72, 72, 72, 73, 73,
+        73, 73, 73, 73, 73, 73, 69, 68, 68, 67, 67, 67, 67, 68, 68, 68, 69, 69,
+        69, 70, 70, 70, 71, 71, 71, 72, 72, 72, 72, 72, 73, 73, 73, 73, 73, 73,
+        73, 73, 69, 68, 68, 67, 67, 67, 67, 68, 68, 68, 69, 69, 69, 70, 70, 70,
+        71, 71, 71, 72, 72, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73, 73 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 77, 79, 85, 77, 81, 83, 86, 79, 83, 89, 91, 85, 86, 91, 94,
+        /* Size 8 */
+        64, 58, 74, 76, 77, 80, 83, 85, 58, 68, 75, 72, 73, 75, 78, 81, 74, 75,
+        78, 78, 78, 79, 81, 83, 76, 72, 78, 80, 82, 83, 84, 85, 77, 73, 78, 82,
+        84, 85, 87, 88, 80, 75, 79, 83, 85, 87, 89, 90, 83, 78, 81, 84, 87, 89,
+        90, 91, 85, 81, 83, 85, 88, 90, 91, 92,
+        /* Size 16 */
+        64, 61, 58, 65, 74, 75, 76, 76, 77, 78, 80, 81, 83, 84, 85, 85, 61, 62,
+        63, 68, 74, 74, 74, 74, 75, 76, 77, 79, 80, 82, 83, 83, 58, 63, 68, 71,
+        75, 73, 72, 72, 73, 74, 75, 77, 78, 80, 81, 81, 65, 68, 71, 74, 76, 76,
+        75, 75, 75, 76, 77, 78, 79, 81, 82, 82, 74, 74, 75, 76, 78, 78, 78, 78,
+        78, 78, 79, 80, 81, 82, 83, 83, 75, 74, 73, 76, 78, 79, 79, 79, 79, 80,
+        81, 81, 82, 83, 84, 84, 76, 74, 72, 75, 78, 79, 80, 81, 82, 82, 83, 83,
+        84, 85, 85, 85, 76, 74, 72, 75, 78, 79, 81, 82, 83, 83, 84, 85, 85, 86,
+        86, 86, 77, 75, 73, 75, 78, 79, 82, 83, 84, 85, 85, 86, 87, 87, 88, 88,
+        78, 76, 74, 76, 78, 80, 82, 83, 85, 85, 86, 87, 88, 88, 89, 89, 80, 77,
+        75, 77, 79, 81, 83, 84, 85, 86, 87, 88, 89, 89, 90, 90, 81, 79, 77, 78,
+        80, 81, 83, 85, 86, 87, 88, 89, 89, 90, 90, 90, 83, 80, 78, 79, 81, 82,
+        84, 85, 87, 88, 89, 89, 90, 91, 91, 91, 84, 82, 80, 81, 82, 83, 85, 86,
+        87, 88, 89, 90, 91, 91, 92, 92, 85, 83, 81, 82, 83, 84, 85, 86, 88, 89,
+        90, 90, 91, 92, 92, 92, 85, 83, 81, 82, 83, 84, 85, 86, 88, 89, 90, 90,
+        91, 92, 92, 92,
+        /* Size 32 */
+        64, 62, 61, 59, 58, 61, 65, 69, 74, 74, 75, 75, 76, 76, 76, 77, 77, 78,
+        78, 79, 80, 80, 81, 82, 83, 83, 84, 85, 85, 85, 85, 85, 62, 62, 61, 61,
+        60, 63, 67, 70, 74, 74, 74, 75, 75, 75, 75, 76, 76, 77, 77, 78, 79, 79,
+        80, 81, 81, 82, 83, 83, 84, 84, 84, 84, 61, 61, 62, 62, 63, 65, 68, 71,
+        74, 74, 74, 74, 74, 74, 74, 75, 75, 76, 76, 77, 77, 78, 79, 80, 80, 81,
+        82, 82, 83, 83, 83, 83, 59, 61, 62, 64, 65, 67, 70, 72, 74, 74, 74, 73,
+        73, 73, 73, 74, 74, 74, 75, 76, 76, 77, 78, 78, 79, 80, 81, 81, 82, 82,
+        82, 82, 58, 60, 63, 65, 68, 70, 71, 73, 75, 74, 73, 73, 72, 72, 72, 73,
+        73, 73, 74, 75, 75, 76, 77, 77, 78, 79, 80, 80, 81, 81, 81, 81, 61, 63,
+        65, 67, 70, 71, 72, 74, 76, 75, 74, 74, 73, 74, 74, 74, 74, 74, 75, 76,
+        76, 77, 77, 78, 79, 79, 80, 81, 81, 81, 81, 81, 65, 67, 68, 70, 71, 72,
+        74, 75, 76, 76, 76, 75, 75, 75, 75, 75, 75, 76, 76, 76, 77, 78, 78, 79,
+        79, 80, 81, 81, 82, 82, 82, 82, 69, 70, 71, 72, 73, 74, 75, 76, 77, 77,
+        77, 76, 76, 76, 76, 76, 76, 77, 77, 77, 78, 78, 79, 79, 80, 81, 81, 82,
+        82, 82, 82, 82, 74, 74, 74, 74, 75, 76, 76, 77, 78, 78, 78, 78, 78, 78,
+        78, 78, 78, 78, 78, 78, 79, 79, 80, 80, 81, 81, 82, 82, 83, 83, 83, 83,
+        74, 74, 74, 74, 74, 75, 76, 77, 78, 78, 78, 78, 78, 78, 78, 78, 78, 79,
+        79, 79, 80, 80, 81, 81, 81, 82, 82, 83, 83, 83, 83, 83, 75, 74, 74, 74,
+        73, 74, 76, 77, 78, 78, 79, 79, 79, 79, 79, 79, 79, 80, 80, 80, 81, 81,
+        81, 82, 82, 83, 83, 84, 84, 84, 84, 84, 75, 75, 74, 73, 73, 74, 75, 76,
+        78, 78, 79, 79, 80, 80, 80, 80, 81, 81, 81, 81, 82, 82, 82, 83, 83, 83,
+        84, 84, 85, 85, 85, 85, 76, 75, 74, 73, 72, 73, 75, 76, 78, 78, 79, 80,
+        80, 81, 81, 81, 82, 82, 82, 82, 83, 83, 83, 83, 84, 84, 85, 85, 85, 85,
+        85, 85, 76, 75, 74, 73, 72, 74, 75, 76, 78, 78, 79, 80, 81, 81, 81, 82,
+        82, 82, 83, 83, 83, 84, 84, 84, 84, 85, 85, 86, 86, 86, 86, 86, 76, 75,
+        74, 73, 72, 74, 75, 76, 78, 78, 79, 80, 81, 81, 82, 82, 83, 83, 83, 84,
+        84, 84, 85, 85, 85, 85, 86, 86, 86, 86, 86, 86, 77, 76, 75, 74, 73, 74,
+        75, 76, 78, 78, 79, 80, 81, 82, 82, 83, 83, 84, 84, 84, 85, 85, 85, 86,
+        86, 86, 86, 87, 87, 87, 87, 87, 77, 76, 75, 74, 73, 74, 75, 76, 78, 78,
+        79, 81, 82, 82, 83, 83, 84, 84, 85, 85, 85, 86, 86, 86, 87, 87, 87, 87,
+        88, 88, 88, 88, 78, 77, 76, 74, 73, 74, 76, 77, 78, 79, 80, 81, 82, 82,
+        83, 84, 84, 85, 85, 85, 86, 86, 86, 87, 87, 87, 88, 88, 88, 88, 88, 88,
+        78, 77, 76, 75, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 83, 84, 85, 85,
+        85, 86, 86, 87, 87, 87, 88, 88, 88, 88, 89, 89, 89, 89, 79, 78, 77, 76,
+        75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 84, 85, 85, 86, 86, 87, 87,
+        87, 88, 88, 88, 89, 89, 89, 89, 89, 89, 80, 79, 77, 76, 75, 76, 77, 78,
+        79, 80, 81, 82, 83, 83, 84, 85, 85, 86, 86, 87, 87, 88, 88, 88, 89, 89,
+        89, 89, 90, 90, 90, 90, 80, 79, 78, 77, 76, 77, 78, 78, 79, 80, 81, 82,
+        83, 84, 84, 85, 86, 86, 87, 87, 88, 88, 88, 89, 89, 89, 89, 90, 90, 90,
+        90, 90, 81, 80, 79, 78, 77, 77, 78, 79, 80, 81, 81, 82, 83, 84, 85, 85,
+        86, 86, 87, 87, 88, 88, 89, 89, 89, 90, 90, 90, 90, 90, 90, 90, 82, 81,
+        80, 78, 77, 78, 79, 79, 80, 81, 82, 83, 83, 84, 85, 86, 86, 87, 87, 88,
+        88, 89, 89, 89, 90, 90, 90, 90, 91, 91, 91, 91, 83, 81, 80, 79, 78, 79,
+        79, 80, 81, 81, 82, 83, 84, 84, 85, 86, 87, 87, 88, 88, 89, 89, 89, 90,
+        90, 90, 91, 91, 91, 91, 91, 91, 83, 82, 81, 80, 79, 79, 80, 81, 81, 82,
+        83, 83, 84, 85, 85, 86, 87, 87, 88, 88, 89, 89, 90, 90, 90, 91, 91, 91,
+        91, 91, 91, 91, 84, 83, 82, 81, 80, 80, 81, 81, 82, 82, 83, 84, 85, 85,
+        86, 86, 87, 88, 88, 89, 89, 89, 90, 90, 91, 91, 91, 91, 92, 92, 92, 92,
+        85, 83, 82, 81, 80, 81, 81, 82, 82, 83, 84, 84, 85, 86, 86, 87, 87, 88,
+        88, 89, 89, 90, 90, 90, 91, 91, 91, 92, 92, 92, 92, 92, 85, 84, 83, 82,
+        81, 81, 82, 82, 83, 83, 84, 85, 85, 86, 86, 87, 88, 88, 89, 89, 90, 90,
+        90, 91, 91, 91, 92, 92, 92, 92, 92, 92, 85, 84, 83, 82, 81, 81, 82, 82,
+        83, 83, 84, 85, 85, 86, 86, 87, 88, 88, 89, 89, 90, 90, 90, 91, 91, 91,
+        92, 92, 92, 92, 92, 92, 85, 84, 83, 82, 81, 81, 82, 82, 83, 83, 84, 85,
+        85, 86, 86, 87, 88, 88, 89, 89, 90, 90, 90, 91, 91, 91, 92, 92, 92, 92,
+        92, 92, 85, 84, 83, 82, 81, 81, 82, 82, 83, 83, 84, 85, 85, 86, 86, 87,
+        88, 88, 89, 89, 90, 90, 90, 91, 91, 91, 92, 92, 92, 92, 92, 92 },
+      { /* Intra matrices */
+        /* Size 4 */
+        48, 59, 60, 65, 59, 62, 64, 66, 60, 64, 68, 70, 65, 66, 70, 73,
+        /* Size 8 */
+        50, 45, 59, 60, 61, 63, 66, 68, 45, 54, 59, 57, 57, 59, 62, 64, 59, 59,
+        62, 61, 61, 62, 64, 66, 60, 57, 61, 64, 65, 66, 67, 68, 61, 57, 61, 65,
+        67, 68, 69, 70, 63, 59, 62, 66, 68, 70, 71, 72, 66, 62, 64, 67, 69, 71,
+        72, 73, 68, 64, 66, 68, 70, 72, 73, 74,
+        /* Size 16 */
+        50, 47, 45, 51, 58, 59, 59, 60, 61, 62, 63, 64, 65, 66, 67, 67, 47, 48,
+        49, 53, 58, 58, 58, 58, 59, 60, 61, 62, 63, 64, 65, 65, 45, 49, 53, 56,
+        58, 57, 56, 57, 57, 58, 59, 60, 61, 63, 64, 64, 51, 53, 56, 58, 60, 59,
+        59, 59, 59, 60, 60, 61, 62, 63, 65, 65, 58, 58, 58, 60, 62, 61, 61, 61,
+        61, 61, 62, 63, 63, 64, 65, 65, 59, 58, 57, 59, 61, 62, 62, 62, 62, 63,
+        63, 64, 65, 66, 66, 66, 59, 58, 56, 59, 61, 62, 63, 64, 64, 65, 65, 66,
+        66, 67, 67, 67, 60, 58, 57, 59, 61, 62, 64, 64, 65, 66, 66, 67, 67, 68,
+        68, 68, 61, 59, 57, 59, 61, 62, 64, 65, 66, 67, 67, 68, 68, 69, 69, 69,
+        62, 60, 58, 60, 61, 63, 65, 66, 67, 68, 68, 69, 69, 70, 70, 70, 63, 61,
+        59, 60, 62, 63, 65, 66, 67, 68, 69, 70, 70, 71, 71, 71, 64, 62, 60, 61,
+        63, 64, 66, 67, 68, 69, 70, 70, 71, 71, 72, 72, 65, 63, 61, 62, 63, 65,
+        66, 67, 68, 69, 70, 71, 71, 72, 72, 72, 66, 64, 63, 63, 64, 66, 67, 68,
+        69, 70, 71, 71, 72, 72, 73, 73, 67, 65, 64, 65, 65, 66, 67, 68, 69, 70,
+        71, 72, 72, 73, 73, 73, 67, 65, 64, 65, 65, 66, 67, 68, 69, 70, 71, 72,
+        72, 73, 73, 73,
+        /* Size 32 */
+        49, 48, 47, 46, 45, 47, 50, 54, 58, 58, 58, 59, 59, 59, 60, 60, 60, 61,
+        61, 62, 62, 63, 64, 64, 65, 65, 66, 66, 67, 67, 67, 67, 48, 48, 47, 47,
+        46, 49, 52, 55, 58, 58, 58, 58, 58, 58, 59, 59, 59, 60, 60, 61, 61, 62,
+        63, 63, 64, 64, 65, 65, 66, 66, 66, 66, 47, 47, 48, 48, 48, 50, 53, 55,
+        58, 58, 58, 58, 57, 58, 58, 58, 58, 59, 59, 60, 61, 61, 62, 62, 63, 63,
+        64, 65, 65, 65, 65, 65, 46, 47, 48, 49, 51, 52, 54, 56, 58, 58, 57, 57,
+        57, 57, 57, 57, 58, 58, 59, 59, 60, 60, 61, 61, 62, 63, 63, 64, 64, 64,
+        64, 64, 45, 46, 48, 51, 53, 54, 55, 57, 58, 58, 57, 57, 56, 56, 56, 57,
+        57, 57, 58, 58, 59, 59, 60, 60, 61, 62, 62, 63, 63, 63, 63, 63, 47, 49,
+        50, 52, 54, 55, 56, 58, 59, 58, 58, 58, 57, 57, 57, 57, 58, 58, 58, 59,
+        59, 60, 60, 61, 62, 62, 63, 63, 64, 64, 64, 64, 50, 52, 53, 54, 55, 56,
+        57, 59, 60, 59, 59, 59, 58, 58, 58, 58, 59, 59, 59, 60, 60, 61, 61, 62,
+        62, 63, 63, 64, 64, 64, 64, 64, 54, 55, 55, 56, 57, 58, 59, 60, 60, 60,
+        60, 60, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 62, 62, 63, 63, 64, 64,
+        65, 65, 65, 65, 58, 58, 58, 58, 58, 59, 60, 60, 61, 61, 61, 61, 61, 61,
+        61, 61, 61, 61, 61, 61, 62, 62, 62, 63, 63, 64, 64, 65, 65, 65, 65, 65,
+        58, 58, 58, 58, 58, 58, 59, 60, 61, 61, 61, 61, 61, 61, 61, 61, 61, 62,
+        62, 62, 62, 63, 63, 63, 64, 64, 65, 65, 66, 66, 66, 66, 58, 58, 58, 57,
+        57, 58, 59, 60, 61, 61, 61, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63,
+        64, 64, 64, 65, 65, 66, 66, 66, 66, 66, 59, 58, 58, 57, 57, 58, 59, 60,
+        61, 61, 62, 62, 62, 63, 63, 63, 63, 63, 63, 64, 64, 64, 65, 65, 65, 65,
+        66, 66, 67, 67, 67, 67, 59, 58, 57, 57, 56, 57, 58, 59, 61, 61, 62, 62,
+        63, 63, 63, 64, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67,
+        67, 67, 59, 58, 58, 57, 56, 57, 58, 59, 61, 61, 62, 63, 63, 64, 64, 64,
+        64, 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 68, 68, 68, 68, 60, 59,
+        58, 57, 56, 57, 58, 59, 61, 61, 62, 63, 63, 64, 64, 65, 65, 65, 65, 66,
+        66, 66, 66, 67, 67, 67, 67, 68, 68, 68, 68, 68, 60, 59, 58, 57, 57, 57,
+        58, 60, 61, 61, 62, 63, 64, 64, 65, 65, 65, 66, 66, 66, 67, 67, 67, 67,
+        68, 68, 68, 68, 69, 69, 69, 69, 60, 59, 58, 58, 57, 58, 59, 60, 61, 61,
+        62, 63, 64, 64, 65, 65, 66, 66, 67, 67, 67, 67, 68, 68, 68, 68, 69, 69,
+        69, 69, 69, 69, 61, 60, 59, 58, 57, 58, 59, 60, 61, 62, 62, 63, 64, 65,
+        65, 66, 66, 67, 67, 67, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69,
+        61, 60, 59, 59, 58, 58, 59, 60, 61, 62, 63, 63, 64, 65, 65, 66, 67, 67,
+        67, 68, 68, 68, 68, 69, 69, 69, 69, 70, 70, 70, 70, 70, 62, 61, 60, 59,
+        58, 59, 60, 60, 61, 62, 63, 64, 65, 65, 66, 66, 67, 67, 68, 68, 68, 69,
+        69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 62, 61, 61, 60, 59, 59, 60, 61,
+        62, 62, 63, 64, 65, 65, 66, 67, 67, 68, 68, 68, 69, 69, 69, 70, 70, 70,
+        70, 70, 71, 71, 71, 71, 63, 62, 61, 60, 59, 60, 61, 61, 62, 63, 63, 64,
+        65, 66, 66, 67, 67, 68, 68, 69, 69, 69, 70, 70, 70, 70, 71, 71, 71, 71,
+        71, 71, 64, 63, 62, 61, 60, 60, 61, 62, 62, 63, 64, 65, 65, 66, 66, 67,
+        68, 68, 68, 69, 69, 70, 70, 70, 70, 71, 71, 71, 71, 71, 71, 71, 64, 63,
+        62, 61, 60, 61, 62, 62, 63, 63, 64, 65, 66, 66, 67, 67, 68, 68, 69, 69,
+        70, 70, 70, 70, 71, 71, 71, 71, 72, 72, 72, 72, 65, 64, 63, 62, 61, 62,
+        62, 63, 63, 64, 64, 65, 66, 66, 67, 68, 68, 69, 69, 69, 70, 70, 70, 71,
+        71, 71, 71, 72, 72, 72, 72, 72, 65, 64, 63, 63, 62, 62, 63, 63, 64, 64,
+        65, 65, 66, 67, 67, 68, 68, 69, 69, 70, 70, 70, 71, 71, 71, 71, 72, 72,
+        72, 72, 72, 72, 66, 65, 64, 63, 62, 63, 63, 64, 64, 65, 65, 66, 66, 67,
+        67, 68, 69, 69, 69, 70, 70, 71, 71, 71, 71, 72, 72, 72, 72, 72, 72, 72,
+        66, 65, 65, 64, 63, 63, 64, 64, 65, 65, 66, 66, 67, 67, 68, 68, 69, 69,
+        70, 70, 70, 71, 71, 71, 72, 72, 72, 72, 73, 73, 73, 73, 67, 66, 65, 64,
+        63, 64, 64, 65, 65, 66, 66, 67, 67, 68, 68, 69, 69, 69, 70, 70, 71, 71,
+        71, 72, 72, 72, 72, 73, 73, 73, 73, 73, 67, 66, 65, 64, 63, 64, 64, 65,
+        65, 66, 66, 67, 67, 68, 68, 69, 69, 69, 70, 70, 71, 71, 71, 72, 72, 72,
+        72, 73, 73, 73, 73, 73, 67, 66, 65, 64, 63, 64, 64, 65, 65, 66, 66, 67,
+        67, 68, 68, 69, 69, 69, 70, 70, 71, 71, 71, 72, 72, 72, 72, 73, 73, 73,
+        73, 73, 67, 66, 65, 64, 63, 64, 64, 65, 65, 66, 66, 67, 67, 68, 68, 69,
+        69, 69, 70, 70, 71, 71, 71, 72, 72, 72, 72, 73, 73, 73, 73, 73 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 66, 76, 82, 66, 74, 79, 83, 76, 79, 84, 86, 82, 83, 86, 87,
+        /* Size 8 */
+        64, 58, 60, 65, 71, 75, 77, 79, 58, 61, 60, 63, 68, 72, 75, 78, 60, 60,
+        67, 69, 72, 75, 77, 79, 65, 63, 69, 73, 75, 77, 79, 80, 71, 68, 72, 75,
+        78, 79, 80, 81, 75, 72, 75, 77, 79, 80, 81, 82, 77, 75, 77, 79, 80, 81,
+        82, 82, 79, 78, 79, 80, 81, 82, 82, 83,
+        /* Size 16 */
+        64, 61, 58, 59, 60, 62, 65, 68, 71, 72, 75, 76, 77, 78, 79, 79, 61, 60,
+        60, 60, 60, 62, 64, 67, 69, 71, 73, 75, 76, 77, 79, 79, 58, 60, 61, 61,
+        60, 62, 63, 66, 68, 70, 72, 74, 75, 77, 78, 78, 59, 60, 61, 62, 63, 65,
+        66, 68, 70, 72, 73, 75, 76, 77, 78, 78, 60, 60, 60, 63, 67, 68, 69, 71,
+        72, 73, 75, 76, 77, 78, 79, 79, 62, 62, 62, 65, 68, 70, 71, 73, 74, 75,
+        76, 77, 78, 79, 79, 79, 65, 64, 63, 66, 69, 71, 73, 74, 75, 76, 77, 78,
+        79, 79, 80, 80, 68, 67, 66, 68, 71, 73, 74, 75, 77, 77, 78, 79, 79, 80,
+        80, 80, 71, 69, 68, 70, 72, 74, 75, 77, 78, 78, 79, 80, 80, 80, 81, 81,
+        72, 71, 70, 72, 73, 75, 76, 77, 78, 79, 80, 80, 81, 81, 81, 81, 75, 73,
+        72, 73, 75, 76, 77, 78, 79, 80, 80, 81, 81, 81, 82, 82, 76, 75, 74, 75,
+        76, 77, 78, 79, 80, 80, 81, 81, 81, 82, 82, 82, 77, 76, 75, 76, 77, 78,
+        79, 79, 80, 81, 81, 81, 82, 82, 82, 82, 78, 77, 77, 77, 78, 79, 79, 80,
+        80, 81, 81, 82, 82, 82, 83, 83, 79, 79, 78, 78, 79, 79, 80, 80, 81, 81,
+        82, 82, 82, 83, 83, 83, 79, 79, 78, 78, 79, 79, 80, 80, 81, 81, 82, 82,
+        82, 83, 83, 83,
+        /* Size 32 */
+        64, 62, 61, 60, 58, 59, 59, 59, 60, 61, 62, 64, 65, 67, 68, 69, 71, 71,
+        72, 73, 75, 75, 76, 77, 77, 78, 78, 79, 79, 79, 79, 79, 62, 61, 61, 60,
+        59, 59, 59, 60, 60, 61, 62, 64, 65, 66, 67, 69, 70, 71, 72, 73, 74, 75,
+        75, 76, 77, 77, 78, 78, 79, 79, 79, 79, 61, 61, 60, 60, 60, 60, 60, 60,
+        60, 61, 62, 63, 64, 65, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 76, 77,
+        77, 78, 79, 79, 79, 79, 60, 60, 60, 60, 60, 60, 60, 60, 60, 61, 62, 63,
+        64, 65, 66, 67, 69, 70, 71, 72, 73, 73, 74, 75, 76, 76, 77, 78, 78, 78,
+        78, 78, 58, 59, 60, 60, 61, 61, 61, 60, 60, 61, 62, 63, 63, 64, 66, 67,
+        68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 77, 78, 78, 78, 78, 59, 59,
+        60, 60, 61, 61, 61, 61, 62, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
+        73, 73, 74, 75, 76, 76, 77, 77, 78, 78, 78, 78, 59, 59, 60, 60, 61, 61,
+        62, 63, 63, 64, 65, 66, 66, 67, 68, 69, 70, 71, 72, 72, 73, 74, 75, 75,
+        76, 77, 77, 78, 78, 78, 78, 78, 59, 60, 60, 60, 60, 61, 63, 64, 65, 66,
+        66, 67, 68, 69, 69, 70, 71, 72, 72, 73, 74, 75, 75, 76, 77, 77, 77, 78,
+        78, 78, 78, 78, 60, 60, 60, 60, 60, 62, 63, 65, 67, 67, 68, 69, 69, 70,
+        71, 71, 72, 73, 73, 74, 75, 75, 76, 76, 77, 77, 78, 78, 79, 79, 79, 79,
+        61, 61, 61, 61, 61, 62, 64, 66, 67, 68, 69, 70, 70, 71, 72, 72, 73, 73,
+        74, 75, 75, 76, 76, 77, 77, 78, 78, 79, 79, 79, 79, 79, 62, 62, 62, 62,
+        62, 63, 65, 66, 68, 69, 70, 71, 71, 72, 73, 73, 74, 74, 75, 75, 76, 76,
+        77, 77, 78, 78, 79, 79, 79, 79, 79, 79, 64, 64, 63, 63, 63, 64, 66, 67,
+        69, 70, 71, 71, 72, 73, 73, 74, 75, 75, 76, 76, 76, 77, 77, 78, 78, 79,
+        79, 79, 80, 80, 80, 80, 65, 65, 64, 64, 63, 65, 66, 68, 69, 70, 71, 72,
+        73, 74, 74, 75, 75, 76, 76, 77, 77, 78, 78, 78, 79, 79, 79, 80, 80, 80,
+        80, 80, 67, 66, 65, 65, 64, 66, 67, 69, 70, 71, 72, 73, 74, 74, 75, 75,
+        76, 76, 77, 77, 78, 78, 78, 79, 79, 79, 80, 80, 80, 80, 80, 80, 68, 67,
+        67, 66, 66, 67, 68, 69, 71, 72, 73, 73, 74, 75, 75, 76, 77, 77, 77, 78,
+        78, 78, 79, 79, 79, 80, 80, 80, 80, 80, 80, 80, 69, 69, 68, 67, 67, 68,
+        69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 77, 77, 78, 78, 79, 79, 79, 79,
+        80, 80, 80, 80, 81, 81, 81, 81, 71, 70, 69, 69, 68, 69, 70, 71, 72, 73,
+        74, 75, 75, 76, 77, 77, 78, 78, 78, 79, 79, 79, 80, 80, 80, 80, 80, 81,
+        81, 81, 81, 81, 71, 71, 70, 70, 69, 70, 71, 72, 73, 73, 74, 75, 76, 76,
+        77, 77, 78, 78, 79, 79, 79, 80, 80, 80, 80, 80, 81, 81, 81, 81, 81, 81,
+        72, 72, 71, 71, 70, 71, 72, 72, 73, 74, 75, 76, 76, 77, 77, 78, 78, 79,
+        79, 79, 80, 80, 80, 80, 81, 81, 81, 81, 81, 81, 81, 81, 73, 73, 72, 72,
+        71, 72, 72, 73, 74, 75, 75, 76, 77, 77, 78, 78, 79, 79, 79, 80, 80, 80,
+        80, 81, 81, 81, 81, 81, 82, 82, 82, 82, 75, 74, 73, 73, 72, 73, 73, 74,
+        75, 75, 76, 76, 77, 78, 78, 79, 79, 79, 80, 80, 80, 80, 81, 81, 81, 81,
+        81, 82, 82, 82, 82, 82, 75, 75, 74, 73, 73, 73, 74, 75, 75, 76, 76, 77,
+        78, 78, 78, 79, 79, 80, 80, 80, 80, 81, 81, 81, 81, 81, 82, 82, 82, 82,
+        82, 82, 76, 75, 75, 74, 74, 74, 75, 75, 76, 76, 77, 77, 78, 78, 79, 79,
+        80, 80, 80, 80, 81, 81, 81, 81, 81, 82, 82, 82, 82, 82, 82, 82, 77, 76,
+        76, 75, 75, 75, 75, 76, 76, 77, 77, 78, 78, 79, 79, 79, 80, 80, 80, 81,
+        81, 81, 81, 81, 82, 82, 82, 82, 82, 82, 82, 82, 77, 77, 76, 76, 75, 76,
+        76, 77, 77, 77, 78, 78, 79, 79, 79, 80, 80, 80, 81, 81, 81, 81, 81, 82,
+        82, 82, 82, 82, 82, 82, 82, 82, 78, 77, 77, 76, 76, 76, 77, 77, 77, 78,
+        78, 79, 79, 79, 80, 80, 80, 80, 81, 81, 81, 81, 82, 82, 82, 82, 82, 82,
+        82, 82, 82, 82, 78, 78, 77, 77, 77, 77, 77, 77, 78, 78, 79, 79, 79, 80,
+        80, 80, 80, 81, 81, 81, 81, 82, 82, 82, 82, 82, 82, 82, 83, 83, 83, 83,
+        79, 78, 78, 78, 77, 77, 78, 78, 78, 79, 79, 79, 80, 80, 80, 80, 81, 81,
+        81, 81, 82, 82, 82, 82, 82, 82, 82, 83, 83, 83, 83, 83, 79, 79, 79, 78,
+        78, 78, 78, 78, 79, 79, 79, 80, 80, 80, 80, 81, 81, 81, 81, 82, 82, 82,
+        82, 82, 82, 82, 83, 83, 83, 83, 83, 83, 79, 79, 79, 78, 78, 78, 78, 78,
+        79, 79, 79, 80, 80, 80, 80, 81, 81, 81, 81, 82, 82, 82, 82, 82, 82, 82,
+        83, 83, 83, 83, 83, 83, 79, 79, 79, 78, 78, 78, 78, 78, 79, 79, 79, 80,
+        80, 80, 80, 81, 81, 81, 81, 82, 82, 82, 82, 82, 82, 82, 83, 83, 83, 83,
+        83, 83, 79, 79, 79, 78, 78, 78, 78, 78, 79, 79, 79, 80, 80, 80, 80, 81,
+        81, 81, 81, 82, 82, 82, 82, 82, 82, 82, 83, 83, 83, 83, 83, 83 },
+      { /* Intra matrices */
+        /* Size 4 */
+        51, 53, 61, 67, 53, 60, 65, 68, 61, 65, 68, 70, 67, 68, 70, 71,
+        /* Size 8 */
+        55, 49, 51, 56, 61, 64, 67, 69, 49, 52, 51, 54, 58, 62, 65, 67, 51, 51,
+        57, 60, 62, 64, 66, 68, 56, 54, 60, 63, 65, 67, 68, 69, 61, 58, 62, 65,
+        67, 68, 69, 70, 64, 62, 64, 67, 68, 70, 70, 71, 67, 65, 66, 68, 69, 70,
+        71, 71, 69, 67, 68, 69, 70, 71, 71, 72,
+        /* Size 16 */
+        54, 51, 49, 50, 50, 53, 55, 58, 60, 62, 64, 65, 66, 67, 68, 68, 51, 51,
+        50, 50, 51, 52, 54, 57, 59, 61, 63, 64, 65, 66, 67, 67, 49, 50, 51, 51,
+        51, 52, 54, 56, 58, 60, 61, 63, 64, 65, 67, 67, 50, 50, 51, 52, 53, 55,
+        56, 58, 59, 61, 63, 64, 65, 66, 67, 67, 50, 51, 51, 53, 57, 58, 59, 60,
+        61, 63, 64, 65, 66, 67, 67, 67, 53, 52, 52, 55, 58, 59, 61, 62, 63, 64,
+        65, 66, 67, 67, 68, 68, 55, 54, 54, 56, 59, 61, 63, 63, 64, 65, 66, 67,
+        67, 68, 69, 69, 58, 57, 56, 58, 60, 62, 63, 64, 65, 66, 67, 67, 68, 68,
+        69, 69, 60, 59, 58, 59, 61, 63, 64, 65, 66, 67, 68, 68, 69, 69, 69, 69,
+        62, 61, 60, 61, 63, 64, 65, 66, 67, 68, 68, 69, 69, 69, 70, 70, 64, 63,
+        61, 63, 64, 65, 66, 67, 68, 68, 69, 69, 70, 70, 70, 70, 65, 64, 63, 64,
+        65, 66, 67, 67, 68, 69, 69, 70, 70, 70, 70, 70, 66, 65, 64, 65, 66, 67,
+        67, 68, 69, 69, 70, 70, 70, 71, 71, 71, 67, 66, 65, 66, 67, 67, 68, 68,
+        69, 69, 70, 70, 71, 71, 71, 71, 68, 67, 67, 67, 67, 68, 69, 69, 69, 70,
+        70, 70, 71, 71, 71, 71, 68, 67, 67, 67, 67, 68, 69, 69, 69, 70, 70, 70,
+        71, 71, 71, 71,
+        /* Size 32 */
+        54, 52, 51, 50, 49, 49, 49, 50, 50, 51, 53, 54, 55, 56, 57, 58, 60, 61,
+        61, 62, 63, 64, 65, 65, 66, 66, 67, 67, 68, 68, 68, 68, 52, 52, 51, 50,
+        49, 50, 50, 50, 50, 51, 52, 53, 55, 56, 57, 58, 59, 60, 61, 62, 63, 63,
+        64, 65, 65, 66, 66, 67, 67, 67, 67, 67, 51, 51, 51, 50, 50, 50, 50, 50,
+        50, 51, 52, 53, 54, 55, 56, 57, 59, 59, 60, 61, 62, 63, 64, 64, 65, 65,
+        66, 67, 67, 67, 67, 67, 50, 50, 50, 50, 51, 51, 50, 50, 50, 51, 52, 53,
+        54, 55, 56, 57, 58, 59, 60, 61, 62, 62, 63, 64, 65, 65, 66, 66, 67, 67,
+        67, 67, 49, 49, 50, 51, 51, 51, 51, 51, 50, 51, 52, 53, 53, 54, 55, 56,
+        57, 58, 59, 60, 61, 62, 63, 63, 64, 65, 65, 66, 66, 66, 66, 66, 49, 50,
+        50, 51, 51, 51, 51, 52, 52, 52, 53, 54, 55, 55, 56, 57, 58, 59, 60, 61,
+        62, 62, 63, 64, 64, 65, 65, 66, 67, 67, 67, 67, 49, 50, 50, 50, 51, 51,
+        52, 53, 53, 54, 55, 55, 56, 57, 58, 58, 59, 60, 61, 61, 62, 63, 63, 64,
+        65, 65, 66, 66, 67, 67, 67, 67, 50, 50, 50, 50, 51, 52, 53, 54, 55, 55,
+        56, 57, 57, 58, 59, 59, 60, 61, 61, 62, 63, 63, 64, 65, 65, 66, 66, 66,
+        67, 67, 67, 67, 50, 50, 50, 50, 50, 52, 53, 55, 56, 57, 58, 58, 59, 59,
+        60, 60, 61, 62, 62, 63, 63, 64, 64, 65, 65, 66, 66, 67, 67, 67, 67, 67,
+        51, 51, 51, 51, 51, 52, 54, 55, 57, 58, 58, 59, 60, 60, 61, 61, 62, 62,
+        63, 63, 64, 64, 65, 65, 66, 66, 67, 67, 67, 67, 67, 67, 53, 52, 52, 52,
+        52, 53, 55, 56, 58, 58, 59, 60, 60, 61, 62, 62, 63, 63, 64, 64, 65, 65,
+        65, 66, 66, 67, 67, 67, 68, 68, 68, 68, 54, 53, 53, 53, 53, 54, 55, 57,
+        58, 59, 60, 61, 61, 62, 62, 63, 63, 64, 64, 65, 65, 65, 66, 66, 67, 67,
+        67, 68, 68, 68, 68, 68, 55, 55, 54, 54, 53, 55, 56, 57, 59, 60, 60, 61,
+        62, 63, 63, 64, 64, 65, 65, 65, 66, 66, 66, 67, 67, 67, 68, 68, 68, 68,
+        68, 68, 56, 56, 55, 55, 54, 55, 57, 58, 59, 60, 61, 62, 63, 63, 64, 64,
+        65, 65, 65, 66, 66, 66, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 57, 57,
+        56, 56, 55, 56, 58, 59, 60, 61, 62, 62, 63, 64, 64, 65, 65, 65, 66, 66,
+        67, 67, 67, 67, 68, 68, 68, 68, 69, 69, 69, 69, 58, 58, 57, 57, 56, 57,
+        58, 59, 60, 61, 62, 63, 64, 64, 65, 65, 66, 66, 66, 67, 67, 67, 67, 68,
+        68, 68, 68, 69, 69, 69, 69, 69, 60, 59, 59, 58, 57, 58, 59, 60, 61, 62,
+        63, 63, 64, 65, 65, 66, 66, 66, 67, 67, 67, 68, 68, 68, 68, 69, 69, 69,
+        69, 69, 69, 69, 61, 60, 59, 59, 58, 59, 60, 61, 62, 62, 63, 64, 65, 65,
+        65, 66, 66, 67, 67, 67, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69,
+        61, 61, 60, 60, 59, 60, 61, 61, 62, 63, 64, 64, 65, 65, 66, 66, 67, 67,
+        67, 68, 68, 68, 68, 69, 69, 69, 69, 69, 70, 70, 70, 70, 62, 62, 61, 61,
+        60, 61, 61, 62, 63, 63, 64, 65, 65, 66, 66, 67, 67, 67, 68, 68, 68, 68,
+        69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 63, 63, 62, 62, 61, 62, 62, 63,
+        63, 64, 65, 65, 66, 66, 67, 67, 67, 68, 68, 68, 69, 69, 69, 69, 69, 69,
+        70, 70, 70, 70, 70, 70, 64, 63, 63, 62, 62, 62, 63, 63, 64, 64, 65, 65,
+        66, 66, 67, 67, 68, 68, 68, 68, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70,
+        70, 70, 65, 64, 64, 63, 63, 63, 63, 64, 64, 65, 65, 66, 66, 67, 67, 67,
+        68, 68, 68, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 65, 65,
+        64, 64, 63, 64, 64, 65, 65, 65, 66, 66, 67, 67, 67, 68, 68, 68, 69, 69,
+        69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 70, 66, 65, 65, 65, 64, 64,
+        65, 65, 65, 66, 66, 67, 67, 67, 68, 68, 68, 69, 69, 69, 69, 69, 70, 70,
+        70, 70, 70, 70, 70, 70, 70, 70, 66, 66, 65, 65, 65, 65, 65, 66, 66, 66,
+        67, 67, 67, 68, 68, 68, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70,
+        71, 71, 71, 71, 67, 66, 66, 66, 65, 65, 66, 66, 66, 67, 67, 67, 68, 68,
+        68, 68, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 71, 71, 71, 71, 71,
+        67, 67, 67, 66, 66, 66, 66, 66, 67, 67, 67, 68, 68, 68, 68, 69, 69, 69,
+        69, 70, 70, 70, 70, 70, 70, 70, 71, 71, 71, 71, 71, 71, 68, 67, 67, 67,
+        66, 67, 67, 67, 67, 67, 68, 68, 68, 68, 69, 69, 69, 69, 70, 70, 70, 70,
+        70, 70, 70, 71, 71, 71, 71, 71, 71, 71, 68, 67, 67, 67, 66, 67, 67, 67,
+        67, 67, 68, 68, 68, 68, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 71,
+        71, 71, 71, 71, 71, 71, 68, 67, 67, 67, 66, 67, 67, 67, 67, 67, 68, 68,
+        68, 68, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 71, 71, 71, 71, 71,
+        71, 71, 68, 67, 67, 67, 66, 67, 67, 67, 67, 67, 68, 68, 68, 68, 69, 69,
+        69, 69, 70, 70, 70, 70, 70, 70, 70, 71, 71, 71, 71, 71, 71, 71 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 74, 75, 79, 74, 77, 78, 80, 75, 78, 82, 84, 79, 80, 84, 86,
+        /* Size 8 */
+        64, 59, 72, 73, 74, 76, 78, 80, 59, 67, 72, 70, 71, 72, 75, 77, 72, 72,
+        75, 74, 74, 75, 76, 78, 73, 70, 74, 76, 77, 78, 79, 80, 74, 71, 74, 77,
+        79, 80, 80, 81, 76, 72, 75, 78, 80, 81, 82, 82, 78, 75, 76, 79, 80, 82,
+        83, 83, 80, 77, 78, 80, 81, 82, 83, 84,
+        /* Size 16 */
+        64, 62, 59, 65, 72, 72, 73, 73, 74, 75, 76, 77, 78, 79, 80, 80, 62, 62,
+        63, 67, 72, 72, 71, 72, 72, 73, 74, 75, 76, 77, 78, 78, 59, 63, 67, 70,
+        72, 71, 70, 70, 71, 72, 72, 74, 75, 76, 77, 77, 65, 67, 70, 71, 73, 73,
+        72, 72, 72, 73, 74, 75, 75, 76, 77, 77, 72, 72, 72, 73, 75, 74, 74, 74,
+        74, 75, 75, 76, 76, 77, 78, 78, 72, 72, 71, 73, 74, 75, 75, 75, 76, 76,
+        76, 77, 77, 78, 79, 79, 73, 71, 70, 72, 74, 75, 76, 77, 77, 77, 78, 78,
+        79, 79, 80, 80, 73, 72, 70, 72, 74, 75, 77, 77, 78, 78, 79, 79, 79, 80,
+        80, 80, 74, 72, 71, 72, 74, 76, 77, 78, 79, 79, 80, 80, 80, 81, 81, 81,
+        75, 73, 72, 73, 75, 76, 77, 78, 79, 80, 80, 81, 81, 81, 82, 82, 76, 74,
+        72, 74, 75, 76, 78, 79, 80, 80, 81, 81, 82, 82, 82, 82, 77, 75, 74, 75,
+        76, 77, 78, 79, 80, 81, 81, 82, 82, 83, 83, 83, 78, 76, 75, 75, 76, 77,
+        79, 79, 80, 81, 82, 82, 83, 83, 83, 83, 79, 77, 76, 76, 77, 78, 79, 80,
+        81, 81, 82, 83, 83, 83, 84, 84, 80, 78, 77, 77, 78, 79, 80, 80, 81, 82,
+        82, 83, 83, 84, 84, 84, 80, 78, 77, 77, 78, 79, 80, 80, 81, 82, 82, 83,
+        83, 84, 84, 84,
+        /* Size 32 */
+        64, 63, 62, 60, 59, 62, 65, 68, 72, 72, 72, 72, 73, 73, 73, 74, 74, 74,
+        75, 75, 76, 76, 77, 77, 78, 78, 79, 79, 80, 80, 80, 80, 63, 62, 62, 61,
+        61, 63, 66, 69, 72, 72, 72, 72, 72, 72, 73, 73, 73, 74, 74, 74, 75, 75,
+        76, 76, 77, 77, 78, 78, 79, 79, 79, 79, 62, 62, 62, 63, 63, 65, 67, 69,
+        72, 72, 72, 72, 71, 72, 72, 72, 72, 73, 73, 74, 74, 75, 75, 76, 76, 77,
+        77, 78, 78, 78, 78, 78, 60, 61, 63, 64, 65, 67, 68, 70, 72, 72, 71, 71,
+        71, 71, 71, 71, 71, 72, 72, 73, 73, 74, 74, 75, 75, 76, 76, 77, 77, 77,
+        77, 77, 59, 61, 63, 65, 67, 68, 70, 71, 72, 72, 71, 71, 70, 70, 70, 71,
+        71, 71, 72, 72, 72, 73, 74, 74, 75, 75, 76, 76, 77, 77, 77, 77, 62, 63,
+        65, 67, 68, 69, 70, 72, 73, 72, 72, 72, 71, 71, 71, 71, 72, 72, 72, 73,
+        73, 74, 74, 75, 75, 75, 76, 76, 77, 77, 77, 77, 65, 66, 67, 68, 70, 70,
+        71, 72, 73, 73, 73, 72, 72, 72, 72, 72, 72, 73, 73, 73, 74, 74, 75, 75,
+        75, 76, 76, 77, 77, 77, 77, 77, 68, 69, 69, 70, 71, 72, 72, 73, 74, 74,
+        74, 73, 73, 73, 73, 73, 73, 74, 74, 74, 74, 75, 75, 76, 76, 76, 77, 77,
+        78, 78, 78, 78, 72, 72, 72, 72, 72, 73, 73, 74, 75, 75, 74, 74, 74, 74,
+        74, 74, 74, 74, 75, 75, 75, 75, 76, 76, 76, 77, 77, 78, 78, 78, 78, 78,
+        72, 72, 72, 72, 72, 72, 73, 74, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75,
+        75, 75, 76, 76, 76, 77, 77, 77, 78, 78, 78, 78, 78, 78, 72, 72, 72, 71,
+        71, 72, 73, 74, 74, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 77,
+        77, 77, 77, 78, 78, 78, 79, 79, 79, 79, 72, 72, 72, 71, 71, 72, 72, 73,
+        74, 75, 75, 75, 76, 76, 76, 76, 76, 76, 77, 77, 77, 77, 77, 78, 78, 78,
+        79, 79, 79, 79, 79, 79, 73, 72, 71, 71, 70, 71, 72, 73, 74, 75, 75, 76,
+        76, 76, 77, 77, 77, 77, 77, 78, 78, 78, 78, 78, 79, 79, 79, 79, 80, 80,
+        80, 80, 73, 72, 72, 71, 70, 71, 72, 73, 74, 75, 75, 76, 76, 77, 77, 77,
+        77, 78, 78, 78, 78, 78, 79, 79, 79, 79, 80, 80, 80, 80, 80, 80, 73, 73,
+        72, 71, 70, 71, 72, 73, 74, 75, 75, 76, 77, 77, 77, 78, 78, 78, 78, 78,
+        79, 79, 79, 79, 79, 80, 80, 80, 80, 80, 80, 80, 74, 73, 72, 71, 71, 71,
+        72, 73, 74, 75, 75, 76, 77, 77, 78, 78, 78, 78, 79, 79, 79, 79, 80, 80,
+        80, 80, 80, 81, 81, 81, 81, 81, 74, 73, 72, 71, 71, 72, 72, 73, 74, 75,
+        76, 76, 77, 77, 78, 78, 79, 79, 79, 79, 80, 80, 80, 80, 80, 81, 81, 81,
+        81, 81, 81, 81, 74, 74, 73, 72, 71, 72, 73, 74, 74, 75, 76, 76, 77, 78,
+        78, 78, 79, 79, 79, 80, 80, 80, 80, 81, 81, 81, 81, 81, 82, 82, 82, 82,
+        75, 74, 73, 72, 72, 72, 73, 74, 75, 75, 76, 77, 77, 78, 78, 79, 79, 79,
+        80, 80, 80, 80, 81, 81, 81, 81, 81, 82, 82, 82, 82, 82, 75, 74, 74, 73,
+        72, 73, 73, 74, 75, 75, 76, 77, 78, 78, 78, 79, 79, 80, 80, 80, 81, 81,
+        81, 81, 81, 82, 82, 82, 82, 82, 82, 82, 76, 75, 74, 73, 72, 73, 74, 74,
+        75, 76, 76, 77, 78, 78, 79, 79, 80, 80, 80, 81, 81, 81, 81, 82, 82, 82,
+        82, 82, 82, 82, 82, 82, 76, 75, 75, 74, 73, 74, 74, 75, 75, 76, 77, 77,
+        78, 78, 79, 79, 80, 80, 80, 81, 81, 81, 82, 82, 82, 82, 82, 83, 83, 83,
+        83, 83, 77, 76, 75, 74, 74, 74, 75, 75, 76, 76, 77, 77, 78, 79, 79, 80,
+        80, 80, 81, 81, 81, 82, 82, 82, 82, 82, 83, 83, 83, 83, 83, 83, 77, 76,
+        76, 75, 74, 75, 75, 76, 76, 77, 77, 78, 78, 79, 79, 80, 80, 81, 81, 81,
+        82, 82, 82, 82, 83, 83, 83, 83, 83, 83, 83, 83, 78, 77, 76, 75, 75, 75,
+        75, 76, 76, 77, 77, 78, 79, 79, 79, 80, 80, 81, 81, 81, 82, 82, 82, 83,
+        83, 83, 83, 83, 83, 83, 83, 83, 78, 77, 77, 76, 75, 75, 76, 76, 77, 77,
+        78, 78, 79, 79, 80, 80, 81, 81, 81, 82, 82, 82, 82, 83, 83, 83, 83, 83,
+        84, 84, 84, 84, 79, 78, 77, 76, 76, 76, 76, 77, 77, 78, 78, 79, 79, 80,
+        80, 80, 81, 81, 81, 82, 82, 82, 83, 83, 83, 83, 83, 84, 84, 84, 84, 84,
+        79, 78, 78, 77, 76, 76, 77, 77, 78, 78, 78, 79, 79, 80, 80, 81, 81, 81,
+        82, 82, 82, 83, 83, 83, 83, 83, 84, 84, 84, 84, 84, 84, 80, 79, 78, 77,
+        77, 77, 77, 78, 78, 78, 79, 79, 80, 80, 80, 81, 81, 82, 82, 82, 82, 83,
+        83, 83, 83, 84, 84, 84, 84, 84, 84, 84, 80, 79, 78, 77, 77, 77, 77, 78,
+        78, 78, 79, 79, 80, 80, 80, 81, 81, 82, 82, 82, 82, 83, 83, 83, 83, 84,
+        84, 84, 84, 84, 84, 84, 80, 79, 78, 77, 77, 77, 77, 78, 78, 78, 79, 79,
+        80, 80, 80, 81, 81, 82, 82, 82, 82, 83, 83, 83, 83, 84, 84, 84, 84, 84,
+        84, 84, 80, 79, 78, 77, 77, 77, 77, 78, 78, 78, 79, 79, 80, 80, 80, 81,
+        81, 82, 82, 82, 82, 83, 83, 83, 83, 84, 84, 84, 84, 84, 84, 84 },
+      { /* Intra matrices */
+        /* Size 4 */
+        52, 60, 61, 65, 60, 63, 64, 66, 61, 64, 67, 69, 65, 66, 69, 71,
+        /* Size 8 */
+        53, 49, 60, 61, 62, 64, 65, 67, 49, 56, 60, 59, 59, 61, 63, 64, 60, 60,
+        63, 62, 62, 63, 64, 66, 61, 59, 62, 64, 65, 65, 66, 67, 62, 59, 62, 65,
+        66, 67, 68, 69, 64, 61, 63, 65, 67, 68, 69, 70, 65, 63, 64, 66, 68, 69,
+        70, 71, 67, 64, 66, 67, 69, 70, 71, 71,
+        /* Size 16 */
+        53, 51, 49, 54, 60, 60, 61, 61, 62, 62, 63, 64, 65, 66, 67, 67, 51, 51,
+        52, 56, 60, 60, 59, 60, 60, 61, 62, 63, 64, 64, 65, 65, 49, 52, 56, 58,
+        60, 59, 58, 58, 59, 59, 60, 61, 62, 63, 64, 64, 54, 56, 58, 59, 61, 61,
+        60, 60, 60, 61, 61, 62, 63, 64, 65, 65, 60, 60, 60, 61, 62, 62, 62, 62,
+        62, 62, 63, 63, 64, 64, 65, 65, 60, 60, 59, 61, 62, 62, 63, 63, 63, 63,
+        64, 64, 65, 65, 66, 66, 61, 59, 58, 60, 62, 63, 64, 64, 64, 65, 65, 65,
+        66, 66, 67, 67, 61, 60, 58, 60, 62, 63, 64, 64, 65, 65, 66, 66, 67, 67,
+        67, 67, 62, 60, 59, 60, 62, 63, 64, 65, 66, 66, 67, 67, 67, 68, 68, 68,
+        62, 61, 59, 61, 62, 63, 65, 65, 66, 67, 67, 68, 68, 68, 69, 69, 63, 62,
+        60, 61, 63, 64, 65, 66, 67, 67, 68, 68, 69, 69, 69, 69, 64, 63, 61, 62,
+        63, 64, 65, 66, 67, 68, 68, 69, 69, 69, 70, 70, 65, 64, 62, 63, 64, 65,
+        66, 67, 67, 68, 69, 69, 69, 70, 70, 70, 66, 64, 63, 64, 64, 65, 66, 67,
+        68, 68, 69, 69, 70, 70, 70, 70, 67, 65, 64, 65, 65, 66, 67, 67, 68, 69,
+        69, 70, 70, 70, 71, 71, 67, 65, 64, 65, 65, 66, 67, 67, 68, 69, 69, 70,
+        70, 70, 71, 71,
+        /* Size 32 */
+        53, 52, 51, 50, 49, 51, 53, 56, 59, 60, 60, 60, 60, 61, 61, 61, 61, 62,
+        62, 63, 63, 63, 64, 64, 65, 65, 66, 66, 66, 66, 66, 66, 52, 51, 51, 50,
+        50, 52, 54, 57, 59, 60, 60, 60, 60, 60, 60, 60, 61, 61, 61, 62, 62, 63,
+        63, 64, 64, 64, 65, 65, 66, 66, 66, 66, 51, 51, 51, 51, 52, 54, 55, 57,
+        60, 59, 59, 59, 59, 59, 60, 60, 60, 60, 61, 61, 62, 62, 62, 63, 63, 64,
+        64, 65, 65, 65, 65, 65, 50, 50, 51, 52, 54, 55, 56, 58, 60, 59, 59, 59,
+        59, 59, 59, 59, 59, 60, 60, 60, 61, 61, 62, 62, 63, 63, 63, 64, 64, 64,
+        64, 64, 49, 50, 52, 54, 55, 56, 58, 59, 60, 59, 59, 58, 58, 58, 58, 58,
+        59, 59, 59, 60, 60, 61, 61, 61, 62, 62, 63, 63, 64, 64, 64, 64, 51, 52,
+        54, 55, 56, 57, 58, 59, 60, 60, 60, 59, 59, 59, 59, 59, 59, 60, 60, 60,
+        61, 61, 61, 62, 62, 63, 63, 64, 64, 64, 64, 64, 53, 54, 55, 56, 58, 58,
+        59, 60, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 61, 61, 61, 62, 62, 62,
+        63, 63, 64, 64, 64, 64, 64, 64, 56, 57, 57, 58, 59, 59, 60, 61, 61, 61,
+        61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 62, 62, 62, 63, 63, 63, 64, 64,
+        65, 65, 65, 65, 59, 59, 60, 60, 60, 60, 61, 61, 62, 62, 62, 62, 62, 62,
+        62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 64, 64, 64, 65, 65, 65, 65, 65,
+        60, 60, 59, 59, 59, 60, 61, 61, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+        63, 63, 63, 63, 63, 64, 64, 64, 65, 65, 65, 65, 65, 65, 60, 60, 59, 59,
+        59, 60, 60, 61, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 64,
+        64, 64, 64, 65, 65, 65, 66, 66, 66, 66, 60, 60, 59, 59, 58, 59, 60, 61,
+        62, 62, 62, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 65, 65, 65, 65,
+        65, 66, 66, 66, 66, 66, 60, 60, 59, 59, 58, 59, 60, 61, 62, 62, 62, 63,
+        63, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66,
+        66, 66, 61, 60, 59, 59, 58, 59, 60, 61, 62, 62, 63, 63, 64, 64, 64, 64,
+        64, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67, 61, 60,
+        60, 59, 58, 59, 60, 61, 62, 62, 63, 63, 64, 64, 64, 65, 65, 65, 65, 65,
+        66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 61, 60, 60, 59, 58, 59,
+        60, 61, 62, 62, 63, 63, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 66, 67,
+        67, 67, 67, 67, 67, 67, 67, 67, 61, 61, 60, 59, 59, 59, 60, 61, 62, 62,
+        63, 63, 64, 64, 65, 65, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 68,
+        68, 68, 68, 68, 62, 61, 60, 60, 59, 60, 60, 61, 62, 62, 63, 64, 64, 65,
+        65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68,
+        62, 61, 61, 60, 59, 60, 61, 61, 62, 63, 63, 64, 64, 65, 65, 66, 66, 66,
+        66, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 63, 62, 61, 60,
+        60, 60, 61, 61, 62, 63, 63, 64, 65, 65, 65, 66, 66, 66, 67, 67, 67, 67,
+        68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 63, 62, 62, 61, 60, 61, 61, 62,
+        62, 63, 63, 64, 65, 65, 66, 66, 66, 67, 67, 67, 68, 68, 68, 68, 68, 68,
+        69, 69, 69, 69, 69, 69, 63, 63, 62, 61, 61, 61, 62, 62, 63, 63, 64, 64,
+        65, 65, 66, 66, 67, 67, 67, 67, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69,
+        69, 69, 64, 63, 62, 62, 61, 61, 62, 62, 63, 63, 64, 65, 65, 65, 66, 66,
+        67, 67, 67, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 64, 64,
+        63, 62, 61, 62, 62, 63, 63, 64, 64, 65, 65, 66, 66, 67, 67, 67, 68, 68,
+        68, 68, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 65, 64, 63, 63, 62, 62,
+        63, 63, 64, 64, 64, 65, 65, 66, 66, 67, 67, 67, 68, 68, 68, 69, 69, 69,
+        69, 69, 70, 70, 70, 70, 70, 70, 65, 64, 64, 63, 62, 63, 63, 63, 64, 64,
+        65, 65, 66, 66, 66, 67, 67, 68, 68, 68, 68, 69, 69, 69, 69, 70, 70, 70,
+        70, 70, 70, 70, 66, 65, 64, 63, 63, 63, 64, 64, 64, 65, 65, 65, 66, 66,
+        67, 67, 67, 68, 68, 68, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70,
+        66, 65, 65, 64, 63, 64, 64, 64, 65, 65, 65, 66, 66, 67, 67, 67, 68, 68,
+        68, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 66, 66, 65, 64,
+        64, 64, 64, 65, 65, 65, 66, 66, 66, 67, 67, 67, 68, 68, 68, 69, 69, 69,
+        69, 70, 70, 70, 70, 70, 70, 70, 70, 70, 66, 66, 65, 64, 64, 64, 64, 65,
+        65, 65, 66, 66, 66, 67, 67, 67, 68, 68, 68, 69, 69, 69, 69, 70, 70, 70,
+        70, 70, 70, 70, 70, 70, 66, 66, 65, 64, 64, 64, 64, 65, 65, 65, 66, 66,
+        66, 67, 67, 67, 68, 68, 68, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70,
+        70, 70, 66, 66, 65, 64, 64, 64, 64, 65, 65, 65, 66, 66, 66, 67, 67, 67,
+        68, 68, 68, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 70 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 65, 72, 76, 65, 71, 74, 76, 72, 74, 77, 78, 76, 76, 78, 79,
+        /* Size 8 */
+        64, 60, 61, 65, 68, 71, 73, 74, 60, 62, 61, 64, 67, 69, 71, 73, 61, 61,
+        66, 68, 69, 71, 72, 73, 65, 64, 68, 70, 71, 72, 73, 74, 68, 67, 69, 71,
+        73, 74, 74, 75, 71, 69, 71, 72, 74, 74, 75, 75, 73, 71, 72, 73, 74, 75,
+        75, 76, 74, 73, 73, 74, 75, 75, 76, 76,
+        /* Size 16 */
+        64, 62, 60, 60, 61, 63, 65, 67, 68, 70, 71, 72, 73, 73, 74, 74, 62, 61,
+        61, 61, 61, 63, 64, 66, 68, 69, 70, 71, 72, 73, 73, 73, 60, 61, 62, 62,
+        61, 62, 64, 65, 67, 68, 69, 70, 71, 72, 73, 73, 60, 61, 62, 63, 64, 65,
+        66, 67, 68, 69, 70, 71, 72, 73, 73, 73, 61, 61, 61, 64, 66, 67, 68, 68,
+        69, 70, 71, 72, 72, 73, 73, 73, 63, 63, 62, 65, 67, 68, 69, 70, 70, 71,
+        72, 72, 73, 73, 74, 74, 65, 64, 64, 66, 68, 69, 70, 71, 71, 72, 72, 73,
+        73, 74, 74, 74, 67, 66, 65, 67, 68, 70, 71, 71, 72, 73, 73, 73, 74, 74,
+        74, 74, 68, 68, 67, 68, 69, 70, 71, 72, 73, 73, 74, 74, 74, 74, 75, 75,
+        70, 69, 68, 69, 70, 71, 72, 73, 73, 74, 74, 74, 75, 75, 75, 75, 71, 70,
+        69, 70, 71, 72, 72, 73, 74, 74, 74, 75, 75, 75, 75, 75, 72, 71, 70, 71,
+        72, 72, 73, 73, 74, 74, 75, 75, 75, 75, 75, 75, 73, 72, 71, 72, 72, 73,
+        73, 74, 74, 75, 75, 75, 75, 75, 76, 76, 73, 73, 72, 73, 73, 73, 74, 74,
+        74, 75, 75, 75, 75, 76, 76, 76, 74, 73, 73, 73, 73, 74, 74, 74, 75, 75,
+        75, 75, 76, 76, 76, 76, 74, 73, 73, 73, 73, 74, 74, 74, 75, 75, 75, 75,
+        76, 76, 76, 76,
+        /* Size 32 */
+        64, 63, 62, 61, 60, 60, 60, 61, 61, 62, 63, 64, 65, 66, 67, 67, 68, 69,
+        70, 70, 71, 71, 72, 72, 73, 73, 73, 74, 74, 74, 74, 74, 63, 62, 62, 61,
+        60, 61, 61, 61, 61, 62, 63, 64, 65, 65, 66, 67, 68, 69, 69, 70, 70, 71,
+        71, 72, 72, 73, 73, 73, 74, 74, 74, 74, 62, 62, 61, 61, 61, 61, 61, 61,
+        61, 62, 63, 63, 64, 65, 66, 67, 68, 68, 69, 69, 70, 71, 71, 72, 72, 72,
+        73, 73, 73, 73, 73, 73, 61, 61, 61, 61, 61, 61, 61, 61, 61, 62, 63, 63,
+        64, 65, 65, 66, 67, 68, 68, 69, 70, 70, 71, 71, 72, 72, 72, 73, 73, 73,
+        73, 73, 60, 60, 61, 61, 62, 62, 62, 61, 61, 62, 62, 63, 64, 64, 65, 66,
+        67, 67, 68, 69, 69, 70, 70, 71, 71, 72, 72, 73, 73, 73, 73, 73, 60, 61,
+        61, 61, 62, 62, 62, 62, 62, 63, 63, 64, 65, 65, 66, 67, 67, 68, 69, 69,
+        70, 70, 71, 71, 72, 72, 72, 73, 73, 73, 73, 73, 60, 61, 61, 61, 62, 62,
+        63, 63, 64, 64, 65, 65, 66, 66, 67, 67, 68, 69, 69, 70, 70, 71, 71, 71,
+        72, 72, 73, 73, 73, 73, 73, 73, 61, 61, 61, 61, 61, 62, 63, 64, 65, 65,
+        66, 66, 67, 67, 68, 68, 69, 69, 70, 70, 71, 71, 71, 72, 72, 72, 73, 73,
+        73, 73, 73, 73, 61, 61, 61, 61, 61, 62, 64, 65, 66, 66, 67, 67, 68, 68,
+        68, 69, 69, 70, 70, 71, 71, 71, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73,
+        62, 62, 62, 62, 62, 63, 64, 65, 66, 67, 67, 68, 68, 69, 69, 69, 70, 70,
+        71, 71, 71, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 63, 63, 63, 63,
+        62, 63, 65, 66, 67, 67, 68, 68, 69, 69, 70, 70, 70, 71, 71, 71, 72, 72,
+        72, 73, 73, 73, 73, 74, 74, 74, 74, 74, 64, 64, 63, 63, 63, 64, 65, 66,
+        67, 68, 68, 69, 69, 70, 70, 71, 71, 71, 72, 72, 72, 72, 73, 73, 73, 73,
+        74, 74, 74, 74, 74, 74, 65, 65, 64, 64, 64, 65, 66, 67, 68, 68, 69, 69,
+        70, 70, 71, 71, 71, 72, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 74,
+        74, 74, 66, 65, 65, 65, 64, 65, 66, 67, 68, 69, 69, 70, 70, 71, 71, 71,
+        72, 72, 72, 73, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 74, 74, 67, 66,
+        66, 65, 65, 66, 67, 68, 68, 69, 70, 70, 71, 71, 71, 72, 72, 72, 73, 73,
+        73, 73, 73, 74, 74, 74, 74, 74, 74, 74, 74, 74, 67, 67, 67, 66, 66, 67,
+        67, 68, 69, 69, 70, 71, 71, 71, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74,
+        74, 74, 74, 74, 75, 75, 75, 75, 68, 68, 68, 67, 67, 67, 68, 69, 69, 70,
+        70, 71, 71, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 74, 75,
+        75, 75, 75, 75, 69, 69, 68, 68, 67, 68, 69, 69, 70, 70, 71, 71, 72, 72,
+        72, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75,
+        70, 69, 69, 68, 68, 69, 69, 70, 70, 71, 71, 72, 72, 72, 73, 73, 73, 73,
+        74, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 70, 70, 69, 69,
+        69, 69, 70, 70, 71, 71, 71, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 74,
+        74, 75, 75, 75, 75, 75, 75, 75, 75, 75, 71, 70, 70, 70, 69, 70, 70, 71,
+        71, 71, 72, 72, 72, 73, 73, 73, 74, 74, 74, 74, 74, 74, 75, 75, 75, 75,
+        75, 75, 75, 75, 75, 75, 71, 71, 71, 70, 70, 70, 71, 71, 71, 72, 72, 72,
+        73, 73, 73, 74, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 75,
+        75, 75, 72, 71, 71, 71, 70, 71, 71, 71, 72, 72, 72, 73, 73, 73, 73, 74,
+        74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 72, 72,
+        72, 71, 71, 71, 71, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 74, 75,
+        75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 73, 72, 72, 72, 71, 72,
+        72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75,
+        75, 75, 75, 75, 76, 76, 76, 76, 73, 73, 72, 72, 72, 72, 72, 72, 73, 73,
+        73, 73, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 76,
+        76, 76, 76, 76, 73, 73, 73, 72, 72, 72, 73, 73, 73, 73, 73, 74, 74, 74,
+        74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76,
+        74, 73, 73, 73, 73, 73, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 75, 75,
+        75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 74, 74, 73, 73,
+        73, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75,
+        75, 75, 76, 76, 76, 76, 76, 76, 76, 76, 74, 74, 73, 73, 73, 73, 73, 73,
+        73, 74, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 75, 76, 76,
+        76, 76, 76, 76, 76, 76, 74, 74, 73, 73, 73, 73, 73, 73, 73, 74, 74, 74,
+        74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76,
+        76, 76, 74, 74, 73, 73, 73, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 75,
+        75, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 76 },
+      { /* Intra matrices */
+        /* Size 4 */
+        55, 57, 62, 66, 57, 61, 65, 67, 62, 65, 67, 68, 66, 67, 68, 69,
+        /* Size 8 */
+        58, 54, 55, 59, 62, 64, 66, 67, 54, 56, 55, 57, 60, 63, 65, 66, 55, 55,
+        60, 61, 63, 64, 66, 67, 59, 57, 61, 64, 65, 66, 67, 68, 62, 60, 63, 65,
+        66, 67, 68, 68, 64, 63, 64, 66, 67, 68, 68, 69, 66, 65, 66, 67, 68, 68,
+        69, 69, 67, 66, 67, 68, 68, 69, 69, 69,
+        /* Size 16 */
+        57, 55, 53, 54, 55, 56, 58, 60, 62, 63, 64, 65, 66, 66, 67, 67, 55, 55,
+        54, 55, 55, 56, 58, 59, 61, 62, 63, 64, 65, 66, 66, 66, 53, 54, 55, 55,
+        55, 56, 57, 58, 60, 61, 63, 63, 64, 65, 66, 66, 54, 55, 55, 56, 57, 58,
+        59, 60, 61, 62, 63, 64, 65, 66, 66, 66, 55, 55, 55, 57, 59, 60, 61, 62,
+        62, 63, 64, 65, 65, 66, 66, 66, 56, 56, 56, 58, 60, 61, 62, 63, 63, 64,
+        65, 65, 66, 66, 67, 67, 58, 58, 57, 59, 61, 62, 63, 64, 65, 65, 66, 66,
+        66, 67, 67, 67, 60, 59, 58, 60, 62, 63, 64, 65, 65, 66, 66, 66, 67, 67,
+        67, 67, 62, 61, 60, 61, 62, 63, 65, 65, 66, 66, 67, 67, 67, 67, 68, 68,
+        63, 62, 61, 62, 63, 64, 65, 66, 66, 67, 67, 67, 67, 68, 68, 68, 64, 63,
+        63, 63, 64, 65, 66, 66, 67, 67, 67, 68, 68, 68, 68, 68, 65, 64, 63, 64,
+        65, 65, 66, 66, 67, 67, 68, 68, 68, 68, 68, 68, 66, 65, 64, 65, 65, 66,
+        66, 67, 67, 67, 68, 68, 68, 68, 68, 68, 66, 66, 65, 66, 66, 66, 67, 67,
+        67, 68, 68, 68, 68, 68, 69, 69, 67, 66, 66, 66, 66, 67, 67, 67, 68, 68,
+        68, 68, 68, 69, 69, 69, 67, 66, 66, 66, 66, 67, 67, 67, 68, 68, 68, 68,
+        68, 69, 69, 69,
+        /* Size 32 */
+        57, 56, 55, 54, 53, 54, 54, 54, 54, 55, 56, 57, 58, 59, 60, 60, 61, 62,
+        63, 63, 64, 64, 65, 65, 65, 66, 66, 66, 67, 67, 67, 67, 56, 56, 55, 54,
+        54, 54, 54, 54, 54, 55, 56, 57, 58, 58, 59, 60, 61, 62, 62, 63, 63, 64,
+        64, 65, 65, 65, 66, 66, 66, 66, 66, 66, 55, 55, 55, 54, 54, 54, 54, 54,
+        55, 55, 56, 57, 57, 58, 59, 60, 61, 61, 62, 62, 63, 63, 64, 64, 65, 65,
+        65, 66, 66, 66, 66, 66, 54, 54, 54, 55, 55, 55, 55, 55, 55, 55, 56, 56,
+        57, 58, 59, 59, 60, 61, 61, 62, 63, 63, 64, 64, 65, 65, 65, 66, 66, 66,
+        66, 66, 53, 54, 54, 55, 55, 55, 55, 55, 55, 55, 56, 56, 57, 57, 58, 59,
+        60, 60, 61, 62, 62, 63, 63, 64, 64, 65, 65, 65, 66, 66, 66, 66, 54, 54,
+        54, 55, 55, 55, 55, 56, 56, 56, 57, 57, 58, 58, 59, 60, 60, 61, 61, 62,
+        63, 63, 64, 64, 64, 65, 65, 65, 66, 66, 66, 66, 54, 54, 54, 55, 55, 55,
+        56, 56, 57, 57, 58, 58, 59, 59, 60, 60, 61, 61, 62, 63, 63, 63, 64, 64,
+        65, 65, 65, 66, 66, 66, 66, 66, 54, 54, 54, 55, 55, 56, 56, 57, 58, 58,
+        59, 59, 60, 60, 61, 61, 62, 62, 62, 63, 63, 64, 64, 65, 65, 65, 65, 66,
+        66, 66, 66, 66, 54, 54, 55, 55, 55, 56, 57, 58, 59, 59, 60, 60, 61, 61,
+        61, 62, 62, 63, 63, 63, 64, 64, 64, 65, 65, 65, 66, 66, 66, 66, 66, 66,
+        55, 55, 55, 55, 55, 56, 57, 58, 59, 60, 60, 61, 61, 62, 62, 62, 63, 63,
+        63, 64, 64, 64, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 56, 56, 56, 56,
+        56, 57, 58, 59, 60, 60, 61, 61, 62, 62, 63, 63, 63, 64, 64, 64, 65, 65,
+        65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 57, 57, 57, 56, 56, 57, 58, 59,
+        60, 61, 61, 62, 62, 63, 63, 63, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66,
+        66, 66, 67, 67, 67, 67, 58, 58, 57, 57, 57, 58, 59, 60, 61, 61, 62, 62,
+        63, 63, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 67, 67, 67,
+        67, 67, 59, 58, 58, 58, 57, 58, 59, 60, 61, 62, 62, 63, 63, 64, 64, 64,
+        65, 65, 65, 65, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 60, 59,
+        59, 59, 58, 59, 60, 61, 61, 62, 63, 63, 64, 64, 64, 65, 65, 65, 65, 66,
+        66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, 60, 60, 60, 59, 59, 60,
+        60, 61, 62, 62, 63, 63, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 66, 67,
+        67, 67, 67, 67, 67, 67, 67, 67, 61, 61, 61, 60, 60, 60, 61, 62, 62, 63,
+        63, 64, 64, 65, 65, 65, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67,
+        67, 67, 67, 67, 62, 62, 61, 61, 60, 61, 61, 62, 63, 63, 64, 64, 65, 65,
+        65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68,
+        63, 62, 62, 61, 61, 61, 62, 62, 63, 63, 64, 64, 65, 65, 65, 66, 66, 66,
+        66, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 63, 63, 62, 62,
+        62, 62, 63, 63, 63, 64, 64, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67,
+        67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 64, 63, 63, 63, 62, 63, 63, 63,
+        64, 64, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 68, 68,
+        68, 68, 68, 68, 68, 68, 64, 64, 63, 63, 63, 63, 63, 64, 64, 64, 65, 65,
+        66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68,
+        68, 68, 65, 64, 64, 64, 63, 64, 64, 64, 64, 65, 65, 65, 66, 66, 66, 66,
+        67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 65, 65,
+        64, 64, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67,
+        67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 65, 65, 65, 65, 64, 64,
+        65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68,
+        68, 68, 68, 68, 68, 68, 68, 68, 66, 65, 65, 65, 65, 65, 65, 65, 65, 66,
+        66, 66, 66, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+        68, 68, 68, 68, 66, 66, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 67,
+        67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+        66, 66, 66, 66, 65, 65, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67,
+        68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 67, 66, 66, 66,
+        66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68,
+        68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 67, 66, 66, 66, 66, 66, 66, 66,
+        66, 66, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+        68, 68, 68, 68, 68, 68, 67, 66, 66, 66, 66, 66, 66, 66, 66, 66, 67, 67,
+        67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+        68, 68, 67, 66, 66, 66, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67,
+        67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 71, 72, 74, 71, 73, 73, 75, 72, 73, 76, 77, 74, 75, 77, 78,
+        /* Size 8 */
+        64, 61, 69, 70, 71, 72, 73, 74, 61, 66, 69, 68, 69, 70, 71, 72, 69, 69,
+        71, 71, 71, 71, 72, 73, 70, 68, 71, 72, 73, 73, 74, 74, 71, 69, 71, 73,
+        74, 74, 75, 75, 72, 70, 71, 73, 74, 75, 75, 76, 73, 71, 72, 74, 75, 75,
+        76, 76, 74, 72, 73, 74, 75, 76, 76, 77,
+        /* Size 16 */
+        64, 62, 61, 65, 69, 70, 70, 70, 71, 71, 72, 72, 73, 74, 74, 74, 62, 63,
+        63, 66, 69, 69, 69, 69, 70, 70, 71, 71, 72, 73, 73, 73, 61, 63, 66, 68,
+        69, 69, 68, 68, 69, 69, 70, 70, 71, 72, 72, 72, 65, 66, 68, 69, 70, 70,
+        69, 70, 70, 70, 70, 71, 72, 72, 73, 73, 69, 69, 69, 70, 71, 71, 71, 71,
+        71, 71, 71, 72, 72, 73, 73, 73, 70, 69, 69, 70, 71, 71, 71, 72, 72, 72,
+        72, 72, 73, 73, 74, 74, 70, 69, 68, 69, 71, 71, 72, 72, 73, 73, 73, 73,
+        74, 74, 74, 74, 70, 69, 68, 70, 71, 72, 72, 73, 73, 73, 74, 74, 74, 74,
+        75, 75, 71, 70, 69, 70, 71, 72, 73, 73, 74, 74, 74, 74, 75, 75, 75, 75,
+        71, 70, 69, 70, 71, 72, 73, 73, 74, 74, 75, 75, 75, 75, 76, 76, 72, 71,
+        70, 70, 71, 72, 73, 74, 74, 75, 75, 75, 75, 76, 76, 76, 72, 71, 70, 71,
+        72, 72, 73, 74, 74, 75, 75, 75, 76, 76, 76, 76, 73, 72, 71, 72, 72, 73,
+        74, 74, 75, 75, 75, 76, 76, 76, 76, 76, 74, 73, 72, 72, 73, 73, 74, 74,
+        75, 75, 76, 76, 76, 76, 77, 77, 74, 73, 72, 73, 73, 74, 74, 75, 75, 76,
+        76, 76, 76, 77, 77, 77, 74, 73, 72, 73, 73, 74, 74, 75, 75, 76, 76, 76,
+        76, 77, 77, 77,
+        /* Size 32 */
+        64, 63, 62, 61, 61, 63, 65, 67, 69, 69, 70, 70, 70, 70, 70, 70, 71, 71,
+        71, 72, 72, 72, 72, 73, 73, 73, 74, 74, 74, 74, 74, 74, 63, 63, 62, 62,
+        62, 64, 65, 67, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 71, 71, 71, 72,
+        72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 62, 62, 63, 63, 63, 65, 66, 68,
+        69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 71, 71, 71, 72, 72, 72,
+        73, 73, 73, 73, 73, 73, 61, 62, 63, 64, 65, 66, 67, 68, 69, 69, 69, 69,
+        69, 69, 69, 69, 69, 69, 70, 70, 70, 71, 71, 71, 72, 72, 72, 72, 73, 73,
+        73, 73, 61, 62, 63, 65, 66, 67, 68, 69, 69, 69, 69, 68, 68, 68, 68, 68,
+        69, 69, 69, 69, 70, 70, 70, 71, 71, 71, 72, 72, 72, 72, 72, 72, 63, 64,
+        65, 66, 67, 68, 68, 69, 70, 70, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70,
+        70, 70, 71, 71, 71, 72, 72, 72, 73, 73, 73, 73, 65, 65, 66, 67, 68, 68,
+        69, 70, 70, 70, 70, 70, 69, 70, 70, 70, 70, 70, 70, 70, 70, 71, 71, 71,
+        72, 72, 72, 72, 73, 73, 73, 73, 67, 67, 68, 68, 69, 69, 70, 70, 71, 71,
+        70, 70, 70, 70, 70, 70, 70, 70, 71, 71, 71, 71, 71, 72, 72, 72, 72, 73,
+        73, 73, 73, 73, 69, 69, 69, 69, 69, 70, 70, 71, 71, 71, 71, 71, 71, 71,
+        71, 71, 71, 71, 71, 71, 71, 72, 72, 72, 72, 72, 73, 73, 73, 73, 73, 73,
+        69, 69, 69, 69, 69, 70, 70, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
+        71, 72, 72, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73, 73, 70, 69, 69, 69,
+        69, 69, 70, 70, 71, 71, 71, 71, 71, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+        72, 73, 73, 73, 73, 73, 74, 74, 74, 74, 70, 69, 69, 69, 68, 69, 70, 70,
+        71, 71, 71, 72, 72, 72, 72, 72, 72, 72, 72, 72, 73, 73, 73, 73, 73, 73,
+        74, 74, 74, 74, 74, 74, 70, 69, 69, 69, 68, 69, 69, 70, 71, 71, 71, 72,
+        72, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74,
+        74, 74, 70, 70, 69, 69, 68, 69, 70, 70, 71, 71, 72, 72, 72, 72, 73, 73,
+        73, 73, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 70, 70,
+        69, 69, 68, 69, 70, 70, 71, 71, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73,
+        74, 74, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 70, 70, 69, 69, 68, 69,
+        70, 70, 71, 71, 72, 72, 72, 73, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74,
+        74, 74, 75, 75, 75, 75, 75, 75, 71, 70, 70, 69, 69, 69, 70, 70, 71, 71,
+        72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75,
+        75, 75, 75, 75, 71, 70, 70, 69, 69, 69, 70, 70, 71, 71, 72, 72, 73, 73,
+        73, 73, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75,
+        71, 71, 70, 70, 69, 70, 70, 71, 71, 71, 72, 72, 73, 73, 73, 74, 74, 74,
+        74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 72, 71, 70, 70,
+        69, 70, 70, 71, 71, 72, 72, 72, 73, 73, 73, 74, 74, 74, 74, 75, 75, 75,
+        75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 72, 71, 71, 70, 70, 70, 70, 71,
+        71, 72, 72, 73, 73, 73, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 76,
+        76, 76, 76, 76, 76, 76, 72, 72, 71, 71, 70, 70, 71, 71, 72, 72, 72, 73,
+        73, 73, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76,
+        76, 76, 72, 72, 71, 71, 70, 71, 71, 71, 72, 72, 72, 73, 73, 74, 74, 74,
+        74, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 76, 76, 73, 72,
+        72, 71, 71, 71, 71, 72, 72, 72, 73, 73, 73, 74, 74, 74, 75, 75, 75, 75,
+        75, 75, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 73, 73, 72, 72, 71, 71,
+        72, 72, 72, 73, 73, 73, 74, 74, 74, 74, 75, 75, 75, 75, 75, 76, 76, 76,
+        76, 76, 76, 76, 76, 76, 76, 76, 73, 73, 72, 72, 71, 72, 72, 72, 72, 73,
+        73, 73, 74, 74, 74, 74, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 76,
+        77, 77, 77, 77, 74, 73, 73, 72, 72, 72, 72, 72, 73, 73, 73, 74, 74, 74,
+        74, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 77, 77, 77, 77, 77,
+        74, 73, 73, 72, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 75, 75, 75, 75,
+        75, 76, 76, 76, 76, 76, 76, 76, 77, 77, 77, 77, 77, 77, 74, 74, 73, 73,
+        72, 73, 73, 73, 73, 73, 74, 74, 74, 74, 75, 75, 75, 75, 76, 76, 76, 76,
+        76, 76, 76, 77, 77, 77, 77, 77, 77, 77, 74, 74, 73, 73, 72, 73, 73, 73,
+        73, 73, 74, 74, 74, 74, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 77,
+        77, 77, 77, 77, 77, 77, 74, 74, 73, 73, 72, 73, 73, 73, 73, 73, 74, 74,
+        74, 74, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 77, 77, 77, 77, 77,
+        77, 77, 74, 74, 73, 73, 72, 73, 73, 73, 73, 73, 74, 74, 74, 74, 75, 75,
+        75, 75, 76, 76, 76, 76, 76, 76, 76, 77, 77, 77, 77, 77, 77, 77 },
+      { /* Intra matrices */
+        /* Size 4 */
+        55, 61, 62, 65, 61, 63, 64, 65, 62, 64, 66, 67, 65, 65, 67, 68,
+        /* Size 8 */
+        57, 53, 61, 62, 63, 64, 65, 66, 53, 59, 62, 60, 61, 62, 63, 64, 61, 62,
+        63, 63, 63, 63, 64, 65, 62, 60, 63, 64, 65, 65, 66, 66, 63, 61, 63, 65,
+        66, 66, 67, 67, 64, 62, 63, 65, 66, 67, 67, 68, 65, 63, 64, 66, 67, 67,
+        68, 68, 66, 64, 65, 66, 67, 68, 68, 69,
+        /* Size 16 */
+        56, 55, 53, 57, 61, 61, 62, 62, 62, 63, 64, 64, 65, 65, 66, 66, 55, 55,
+        56, 58, 61, 61, 61, 61, 61, 62, 63, 63, 64, 64, 65, 65, 53, 56, 58, 60,
+        61, 61, 60, 60, 61, 61, 62, 62, 63, 63, 64, 64, 57, 58, 60, 61, 62, 62,
+        61, 61, 62, 62, 62, 63, 63, 64, 64, 64, 61, 61, 61, 62, 63, 63, 63, 63,
+        63, 63, 63, 64, 64, 64, 65, 65, 61, 61, 61, 62, 63, 63, 63, 63, 63, 64,
+        64, 64, 65, 65, 65, 65, 62, 61, 60, 61, 63, 63, 64, 64, 64, 64, 65, 65,
+        65, 65, 66, 66, 62, 61, 60, 61, 63, 63, 64, 64, 65, 65, 65, 65, 66, 66,
+        66, 66, 62, 61, 61, 62, 63, 63, 64, 65, 65, 66, 66, 66, 66, 66, 67, 67,
+        63, 62, 61, 62, 63, 64, 64, 65, 66, 66, 66, 66, 67, 67, 67, 67, 64, 63,
+        62, 62, 63, 64, 65, 65, 66, 66, 67, 67, 67, 67, 67, 67, 64, 63, 62, 63,
+        64, 64, 65, 65, 66, 66, 67, 67, 67, 68, 68, 68, 65, 64, 63, 63, 64, 65,
+        65, 66, 66, 67, 67, 67, 68, 68, 68, 68, 65, 64, 63, 64, 64, 65, 65, 66,
+        66, 67, 67, 68, 68, 68, 68, 68, 66, 65, 64, 64, 65, 65, 66, 66, 67, 67,
+        67, 68, 68, 68, 68, 68, 66, 65, 64, 64, 65, 65, 66, 66, 67, 67, 67, 68,
+        68, 68, 68, 68,
+        /* Size 32 */
+        56, 55, 55, 54, 53, 55, 57, 59, 61, 61, 61, 61, 62, 62, 62, 62, 62, 63,
+        63, 63, 63, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 55, 55, 55, 54,
+        54, 56, 57, 59, 61, 61, 61, 61, 61, 61, 62, 62, 62, 62, 62, 63, 63, 63,
+        63, 64, 64, 64, 65, 65, 65, 65, 65, 65, 55, 55, 55, 55, 55, 57, 58, 60,
+        61, 61, 61, 61, 61, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 64, 64,
+        64, 64, 65, 65, 65, 65, 54, 54, 55, 56, 57, 58, 59, 60, 61, 61, 61, 61,
+        60, 61, 61, 61, 61, 61, 61, 62, 62, 62, 63, 63, 63, 63, 64, 64, 64, 64,
+        64, 64, 53, 54, 55, 57, 58, 59, 60, 60, 61, 61, 61, 60, 60, 60, 60, 60,
+        60, 61, 61, 61, 61, 62, 62, 62, 63, 63, 63, 64, 64, 64, 64, 64, 55, 56,
+        57, 58, 59, 60, 60, 61, 62, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 62,
+        62, 62, 62, 63, 63, 63, 64, 64, 64, 64, 64, 64, 57, 57, 58, 59, 60, 60,
+        61, 61, 62, 62, 62, 61, 61, 61, 61, 61, 61, 62, 62, 62, 62, 62, 63, 63,
+        63, 64, 64, 64, 64, 64, 64, 64, 59, 59, 60, 60, 60, 61, 61, 62, 62, 62,
+        62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 64, 64, 64, 64,
+        64, 64, 64, 64, 61, 61, 61, 61, 61, 62, 62, 62, 63, 63, 63, 63, 62, 62,
+        62, 62, 62, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 65, 65, 65, 65,
+        61, 61, 61, 61, 61, 61, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
+        63, 63, 63, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 61, 61, 61, 61,
+        61, 61, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64,
+        64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 61, 61, 61, 61, 60, 61, 61, 62,
+        63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65,
+        65, 65, 65, 65, 65, 65, 62, 61, 61, 60, 60, 61, 61, 62, 62, 63, 63, 63,
+        64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66,
+        66, 66, 62, 61, 61, 61, 60, 61, 61, 62, 62, 63, 63, 63, 64, 64, 64, 64,
+        64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 62, 62,
+        61, 61, 60, 61, 61, 62, 62, 63, 63, 64, 64, 64, 64, 64, 65, 65, 65, 65,
+        65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 62, 62, 61, 61, 60, 61,
+        61, 62, 62, 63, 63, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 66, 66,
+        66, 66, 66, 66, 66, 66, 66, 66, 62, 62, 61, 61, 60, 61, 61, 62, 62, 63,
+        63, 64, 64, 64, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+        67, 67, 67, 67, 63, 62, 62, 61, 61, 61, 62, 62, 63, 63, 63, 64, 64, 64,
+        65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67,
+        63, 62, 62, 61, 61, 61, 62, 62, 63, 63, 64, 64, 64, 65, 65, 65, 65, 66,
+        66, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 63, 63, 62, 62,
+        61, 62, 62, 62, 63, 63, 64, 64, 64, 65, 65, 65, 66, 66, 66, 66, 66, 66,
+        66, 67, 67, 67, 67, 67, 67, 67, 67, 67, 63, 63, 62, 62, 61, 62, 62, 63,
+        63, 63, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67,
+        67, 67, 67, 67, 67, 67, 64, 63, 63, 62, 62, 62, 62, 63, 63, 64, 64, 64,
+        65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67,
+        67, 67, 64, 63, 63, 63, 62, 62, 63, 63, 63, 64, 64, 64, 65, 65, 65, 66,
+        66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 64, 64,
+        63, 63, 62, 63, 63, 63, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 67,
+        67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 65, 64, 64, 63, 63, 63,
+        63, 64, 64, 64, 64, 65, 65, 65, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67,
+        67, 68, 68, 68, 68, 68, 68, 68, 65, 64, 64, 63, 63, 63, 64, 64, 64, 64,
+        65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68,
+        68, 68, 68, 68, 65, 65, 64, 64, 63, 64, 64, 64, 64, 65, 65, 65, 65, 66,
+        66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68,
+        65, 65, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 67,
+        67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 66, 65, 65, 64,
+        64, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67,
+        68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 66, 65, 65, 64, 64, 64, 64, 64,
+        65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68,
+        68, 68, 68, 68, 68, 68, 66, 65, 65, 64, 64, 64, 64, 64, 65, 65, 65, 65,
+        66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68,
+        68, 68, 66, 65, 65, 64, 64, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66,
+        67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 65, 68, 70, 65, 67, 69, 70, 68, 69, 70, 71, 70, 70, 71, 71,
+        /* Size 8 */
+        64, 62, 62, 64, 66, 67, 68, 69, 62, 63, 63, 64, 65, 67, 68, 68, 62, 63,
+        65, 66, 67, 67, 68, 69, 64, 64, 66, 67, 68, 68, 69, 69, 66, 65, 67, 68,
+        68, 69, 69, 69, 67, 67, 67, 68, 69, 69, 69, 69, 68, 68, 68, 69, 69, 69,
+        69, 69, 69, 68, 69, 69, 69, 69, 69, 70,
+        /* Size 16 */
+        64, 63, 62, 62, 62, 63, 64, 65, 66, 67, 67, 68, 68, 68, 69, 69, 63, 63,
+        62, 62, 63, 63, 64, 65, 66, 66, 67, 67, 68, 68, 69, 69, 62, 62, 63, 63,
+        63, 63, 64, 65, 65, 66, 67, 67, 68, 68, 68, 68, 62, 62, 63, 63, 64, 64,
+        65, 65, 66, 67, 67, 67, 68, 68, 68, 68, 62, 63, 63, 64, 65, 65, 66, 66,
+        67, 67, 67, 68, 68, 68, 69, 69, 63, 63, 63, 64, 65, 66, 66, 67, 67, 67,
+        68, 68, 68, 68, 69, 69, 64, 64, 64, 65, 66, 66, 67, 67, 68, 68, 68, 68,
+        69, 69, 69, 69, 65, 65, 65, 65, 66, 67, 67, 68, 68, 68, 68, 69, 69, 69,
+        69, 69, 66, 66, 65, 66, 67, 67, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69,
+        67, 66, 66, 67, 67, 67, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 67, 67,
+        67, 67, 67, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 68, 67, 67, 67,
+        68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 68, 68, 68, 68, 68, 68,
+        69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 68, 68, 68, 68, 68, 68, 69, 69,
+        69, 69, 69, 69, 69, 69, 70, 70, 69, 69, 68, 68, 69, 69, 69, 69, 69, 69,
+        69, 69, 69, 70, 70, 70, 69, 69, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69,
+        69, 70, 70, 70,
+        /* Size 32 */
+        64, 63, 63, 62, 62, 62, 62, 62, 62, 63, 63, 64, 64, 65, 65, 66, 66, 66,
+        67, 67, 67, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 63, 63, 63, 62,
+        62, 62, 62, 62, 62, 63, 63, 64, 64, 65, 65, 66, 66, 66, 67, 67, 67, 67,
+        68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 63, 63, 63, 62, 62, 62, 62, 62,
+        63, 63, 63, 64, 64, 65, 65, 65, 66, 66, 66, 67, 67, 67, 67, 68, 68, 68,
+        68, 68, 69, 69, 69, 69, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 64,
+        64, 64, 65, 65, 66, 66, 66, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68,
+        68, 68, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 65, 65,
+        65, 66, 66, 66, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 62, 62,
+        62, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 65, 65, 65, 66, 66, 66, 67,
+        67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 62, 62, 62, 63, 63, 63,
+        63, 63, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 67, 67, 67, 67, 67, 68,
+        68, 68, 68, 68, 68, 68, 68, 68, 62, 62, 62, 63, 63, 63, 63, 64, 64, 65,
+        65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68,
+        68, 68, 68, 68, 62, 62, 63, 63, 63, 63, 64, 64, 65, 65, 65, 66, 66, 66,
+        66, 66, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69,
+        63, 63, 63, 63, 63, 63, 64, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67,
+        67, 67, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 63, 63, 63, 63,
+        63, 64, 64, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 68, 68, 68,
+        68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 64, 64, 64, 64, 63, 64, 65, 65,
+        66, 66, 66, 66, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+        69, 69, 69, 69, 69, 69, 64, 64, 64, 64, 64, 64, 65, 65, 66, 66, 66, 67,
+        67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69,
+        69, 69, 65, 65, 65, 64, 64, 65, 65, 66, 66, 66, 67, 67, 67, 67, 67, 68,
+        68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 65, 65,
+        65, 65, 65, 65, 65, 66, 66, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68,
+        68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 66, 66, 65, 65, 65, 65,
+        66, 66, 66, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69,
+        69, 69, 69, 69, 69, 69, 69, 69, 66, 66, 66, 66, 65, 66, 66, 66, 67, 67,
+        67, 67, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+        69, 69, 69, 69, 66, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 68, 68, 68,
+        68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+        67, 67, 66, 66, 66, 66, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 69,
+        69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 67, 67, 67, 67,
+        66, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69,
+        69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 67, 67, 67, 67, 67, 67, 67, 67,
+        67, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+        69, 69, 69, 69, 69, 69, 68, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68,
+        68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+        69, 69, 68, 68, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 69, 69,
+        69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 68, 68,
+        68, 68, 67, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69,
+        69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 68, 68, 68, 68, 68, 68,
+        68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+        69, 69, 69, 69, 69, 69, 69, 69, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+        68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+        70, 70, 70, 70, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69,
+        69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70,
+        69, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+        69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 69, 69, 69, 68,
+        68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+        69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 69, 69, 69, 68, 68, 68, 68, 68,
+        69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 70,
+        70, 70, 70, 70, 70, 70, 69, 69, 69, 68, 68, 68, 68, 68, 69, 69, 69, 69,
+        69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70,
+        70, 70, 69, 69, 69, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69,
+        69, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70 },
+      { /* Intra matrices */
+        /* Size 4 */
+        59, 60, 63, 65, 60, 63, 64, 65, 63, 64, 66, 66, 65, 65, 66, 66,
+        /* Size 8 */
+        61, 59, 59, 61, 63, 64, 65, 66, 59, 60, 59, 61, 62, 64, 65, 65, 59, 59,
+        62, 63, 64, 64, 65, 65, 61, 61, 63, 64, 65, 65, 65, 66, 63, 62, 64, 65,
+        65, 66, 66, 66, 64, 64, 64, 65, 66, 66, 66, 66, 65, 65, 65, 65, 66, 66,
+        66, 66, 66, 65, 65, 66, 66, 66, 66, 67,
+        /* Size 16 */
+        61, 60, 58, 59, 59, 60, 61, 62, 63, 63, 64, 64, 65, 65, 65, 65, 60, 59,
+        59, 59, 59, 60, 61, 62, 62, 63, 64, 64, 65, 65, 65, 65, 58, 59, 60, 59,
+        59, 60, 60, 61, 62, 63, 63, 64, 64, 65, 65, 65, 59, 59, 59, 60, 60, 61,
+        61, 62, 63, 63, 64, 64, 65, 65, 65, 65, 59, 59, 59, 60, 62, 62, 63, 63,
+        63, 64, 64, 64, 65, 65, 65, 65, 60, 60, 60, 61, 62, 63, 63, 63, 64, 64,
+        64, 65, 65, 65, 65, 65, 61, 61, 60, 61, 63, 63, 64, 64, 64, 65, 65, 65,
+        65, 65, 66, 66, 62, 62, 61, 62, 63, 63, 64, 64, 65, 65, 65, 65, 65, 66,
+        66, 66, 63, 62, 62, 63, 63, 64, 64, 65, 65, 65, 65, 65, 66, 66, 66, 66,
+        63, 63, 63, 63, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 64, 64,
+        63, 64, 64, 64, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 64, 64, 64, 64,
+        64, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 65, 65, 64, 65, 65, 65,
+        65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 65, 65, 65, 65, 65, 65, 65, 66,
+        66, 66, 66, 66, 66, 66, 66, 66, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66,
+        66, 66, 66, 66, 66, 66, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66,
+        66, 66, 66, 66,
+        /* Size 32 */
+        61, 60, 59, 59, 58, 59, 59, 59, 59, 60, 60, 61, 61, 61, 62, 62, 63, 63,
+        63, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 60, 60, 59, 59,
+        59, 59, 59, 59, 59, 59, 60, 60, 61, 61, 62, 62, 63, 63, 63, 63, 64, 64,
+        64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 59, 59, 59, 59, 59, 59, 59, 59,
+        59, 59, 60, 60, 61, 61, 62, 62, 62, 63, 63, 63, 64, 64, 64, 64, 65, 65,
+        65, 65, 65, 65, 65, 65, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60,
+        61, 61, 61, 62, 62, 62, 63, 63, 63, 64, 64, 64, 64, 65, 65, 65, 65, 65,
+        65, 65, 58, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 61, 61, 62,
+        62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 59, 59,
+        59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61, 62, 62, 62, 63, 63, 63,
+        63, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 59, 59, 59, 59, 59, 60,
+        60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 63, 63, 63, 63, 64, 64, 64, 64,
+        64, 65, 65, 65, 65, 65, 65, 65, 59, 59, 59, 59, 59, 60, 60, 60, 61, 61,
+        61, 62, 62, 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 65, 65, 65, 65,
+        65, 65, 65, 65, 59, 59, 59, 59, 59, 60, 60, 61, 62, 62, 62, 62, 62, 63,
+        63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65,
+        60, 59, 59, 59, 59, 60, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, 63, 64,
+        64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 60, 60, 60, 60,
+        60, 60, 61, 61, 62, 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64,
+        65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 61, 60, 60, 60, 60, 61, 61, 62,
+        62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65,
+        65, 65, 65, 65, 65, 65, 61, 61, 61, 61, 60, 61, 61, 62, 62, 63, 63, 63,
+        64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
+        65, 65, 61, 61, 61, 61, 61, 61, 62, 62, 63, 63, 63, 63, 64, 64, 64, 64,
+        64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 62, 62,
+        62, 61, 61, 62, 62, 62, 63, 63, 63, 64, 64, 64, 64, 64, 65, 65, 65, 65,
+        65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 62, 62, 62, 62, 62, 62,
+        62, 63, 63, 63, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65,
+        65, 65, 66, 66, 66, 66, 66, 66, 63, 63, 62, 62, 62, 62, 63, 63, 63, 63,
+        64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66,
+        66, 66, 66, 66, 63, 63, 63, 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 65,
+        65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+        63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65,
+        65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 64, 63, 63, 63,
+        63, 63, 63, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66,
+        66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 64, 64, 64, 63, 63, 63, 64, 64,
+        64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66,
+        66, 66, 66, 66, 66, 66, 64, 64, 64, 64, 63, 64, 64, 64, 64, 64, 64, 65,
+        65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+        66, 66, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65,
+        65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 65, 64,
+        64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66,
+        66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 65, 65, 65, 64, 64, 64,
+        64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66,
+        66, 66, 66, 66, 66, 66, 66, 66, 65, 65, 65, 65, 64, 64, 65, 65, 65, 65,
+        65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+        66, 66, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
+        65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+        65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66,
+        66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 65, 65, 65, 65,
+        65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+        66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65,
+        65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+        66, 66, 66, 66, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
+        65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+        66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66,
+        66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 67, 68, 69, 67, 68, 69, 69, 68, 69, 70, 70, 69, 69, 70, 71,
+        /* Size 8 */
+        64, 62, 67, 67, 67, 68, 68, 69, 62, 65, 67, 66, 66, 67, 68, 68, 67, 67,
+        68, 67, 67, 68, 68, 69, 67, 66, 67, 68, 68, 68, 69, 69, 67, 66, 67, 68,
+        69, 69, 69, 69, 68, 67, 68, 68, 69, 69, 70, 70, 68, 68, 68, 69, 69, 70,
+        70, 70, 69, 68, 69, 69, 69, 70, 70, 70,
+        /* Size 16 */
+        64, 63, 62, 64, 67, 67, 67, 67, 67, 68, 68, 68, 68, 69, 69, 69, 63, 63,
+        64, 65, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 69, 69, 62, 64, 65, 66,
+        67, 66, 66, 66, 66, 67, 67, 67, 68, 68, 68, 68, 64, 65, 66, 67, 67, 67,
+        67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 67, 67, 67, 67, 68, 67, 67, 67,
+        67, 68, 68, 68, 68, 68, 69, 69, 67, 67, 66, 67, 67, 68, 68, 68, 68, 68,
+        68, 68, 68, 69, 69, 69, 67, 67, 66, 67, 67, 68, 68, 68, 68, 68, 68, 69,
+        69, 69, 69, 69, 67, 67, 66, 67, 67, 68, 68, 68, 68, 69, 69, 69, 69, 69,
+        69, 69, 67, 67, 66, 67, 67, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69,
+        68, 67, 67, 67, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 70, 70, 68, 67,
+        67, 67, 68, 68, 68, 69, 69, 69, 69, 69, 70, 70, 70, 70, 68, 68, 67, 68,
+        68, 68, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 68, 68, 68, 68, 68, 68,
+        69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 69, 68, 68, 68, 68, 69, 69, 69,
+        69, 69, 70, 70, 70, 70, 70, 70, 69, 69, 68, 68, 69, 69, 69, 69, 69, 70,
+        70, 70, 70, 70, 70, 70, 69, 69, 68, 68, 69, 69, 69, 69, 69, 70, 70, 70,
+        70, 70, 70, 70,
+        /* Size 32 */
+        64, 64, 63, 63, 62, 63, 64, 65, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67,
+        68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 64, 63, 63, 63,
+        63, 64, 65, 66, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68,
+        68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 63, 63, 63, 63, 64, 64, 65, 66,
+        67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68,
+        68, 68, 69, 69, 69, 69, 63, 63, 63, 64, 64, 65, 66, 66, 67, 67, 67, 66,
+        66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68,
+        68, 68, 62, 63, 64, 64, 65, 66, 66, 66, 67, 67, 66, 66, 66, 66, 66, 66,
+        66, 66, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 63, 64,
+        64, 65, 66, 66, 66, 67, 67, 67, 67, 67, 66, 66, 67, 67, 67, 67, 67, 67,
+        67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 64, 65, 65, 66, 66, 66,
+        67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68,
+        68, 68, 68, 68, 68, 68, 68, 68, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67,
+        67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68,
+        68, 68, 68, 68, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68, 67, 67, 67, 67,
+        67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69,
+        67, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+        68, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 67, 67, 67, 67,
+        66, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+        68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 67, 67, 67, 66, 66, 67, 67, 67,
+        67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69,
+        69, 69, 69, 69, 69, 69, 67, 67, 67, 66, 66, 66, 67, 67, 67, 68, 68, 68,
+        68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69,
+        69, 69, 67, 67, 67, 66, 66, 66, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68,
+        68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 67, 67,
+        67, 66, 66, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69,
+        69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 67, 67, 67, 67, 66, 67,
+        67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69,
+        69, 69, 69, 69, 69, 69, 69, 69, 67, 67, 67, 67, 66, 67, 67, 67, 67, 68,
+        68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+        69, 69, 69, 69, 67, 67, 67, 67, 66, 67, 67, 67, 67, 68, 68, 68, 68, 68,
+        69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+        68, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69,
+        69, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 68, 67, 67, 67,
+        67, 67, 67, 67, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+        69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 68, 68, 67, 67, 67, 67, 67, 67,
+        68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70,
+        70, 70, 70, 70, 70, 70, 68, 68, 68, 67, 67, 67, 67, 68, 68, 68, 68, 68,
+        68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70,
+        70, 70, 68, 68, 68, 67, 67, 67, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69,
+        69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 68, 68,
+        68, 68, 67, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69,
+        70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 68, 68, 68, 68, 68, 68,
+        68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70,
+        70, 70, 70, 70, 70, 70, 70, 70, 69, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+        68, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 70,
+        70, 70, 70, 70, 69, 68, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69,
+        69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
+        69, 69, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+        70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 69, 69, 69, 68,
+        68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70,
+        70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 69, 69, 69, 68, 68, 68, 68, 68,
+        69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70,
+        70, 70, 70, 70, 70, 70, 69, 69, 69, 68, 68, 68, 68, 68, 69, 69, 69, 69,
+        69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
+        70, 70, 69, 69, 69, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69,
+        69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70 },
+      { /* Intra matrices */
+        /* Size 4 */
+        59, 63, 63, 64, 63, 64, 64, 65, 63, 64, 65, 66, 64, 65, 66, 66,
+        /* Size 8 */
+        60, 58, 63, 63, 63, 64, 65, 65, 58, 61, 63, 62, 62, 63, 64, 64, 63, 63,
+        64, 64, 64, 64, 64, 65, 63, 62, 64, 64, 64, 65, 65, 65, 63, 62, 64, 64,
+        65, 65, 65, 66, 64, 63, 64, 65, 65, 65, 66, 66, 65, 64, 64, 65, 65, 66,
+        66, 66, 65, 64, 65, 65, 66, 66, 66, 66,
+        /* Size 16 */
+        60, 59, 58, 60, 63, 63, 63, 63, 63, 64, 64, 64, 64, 65, 65, 65, 59, 59,
+        60, 61, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 58, 60, 61, 62,
+        63, 62, 62, 62, 62, 63, 63, 63, 63, 64, 64, 64, 60, 61, 62, 62, 63, 63,
+        63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 63, 63, 63, 63, 64, 63, 63, 63,
+        63, 63, 64, 64, 64, 64, 64, 64, 63, 63, 62, 63, 63, 64, 64, 64, 64, 64,
+        64, 64, 64, 65, 65, 65, 63, 63, 62, 63, 63, 64, 64, 64, 64, 64, 64, 65,
+        65, 65, 65, 65, 63, 63, 62, 63, 63, 64, 64, 64, 64, 65, 65, 65, 65, 65,
+        65, 65, 63, 63, 62, 63, 63, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65,
+        64, 63, 63, 63, 63, 64, 64, 65, 65, 65, 65, 65, 65, 65, 66, 66, 64, 63,
+        63, 63, 64, 64, 64, 65, 65, 65, 65, 65, 66, 66, 66, 66, 64, 64, 63, 63,
+        64, 64, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 64, 64, 63, 64, 64, 64,
+        65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 65, 64, 64, 64, 64, 65, 65, 65,
+        65, 65, 66, 66, 66, 66, 66, 66, 65, 64, 64, 64, 64, 65, 65, 65, 65, 66,
+        66, 66, 66, 66, 66, 66, 65, 64, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66,
+        66, 66, 66, 66,
+        /* Size 32 */
+        60, 59, 59, 59, 58, 59, 60, 61, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
+        63, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 59, 59, 59, 59,
+        59, 60, 61, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 64,
+        64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 59, 59, 59, 59, 60, 60, 61, 62,
+        63, 63, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 59, 59, 59, 60, 60, 61, 61, 62, 63, 62, 62, 62,
+        62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64,
+        64, 64, 58, 59, 60, 60, 61, 61, 62, 62, 63, 62, 62, 62, 62, 62, 62, 62,
+        62, 62, 62, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 59, 60,
+        60, 61, 61, 62, 62, 62, 63, 63, 63, 62, 62, 62, 62, 62, 62, 63, 63, 63,
+        63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 60, 61, 61, 61, 62, 62,
+        62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 61, 62, 62, 62, 62, 62, 63, 63, 63, 63,
+        63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
+        63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        63, 63, 63, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, 63, 62, 62,
+        62, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 63, 63, 62, 62, 62, 62, 63, 63,
+        63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        65, 65, 65, 65, 65, 65, 63, 63, 62, 62, 62, 62, 63, 63, 63, 63, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65,
+        65, 65, 63, 63, 62, 62, 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 63, 63,
+        63, 62, 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 65,
+        65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 63, 63, 63, 62, 62, 62,
+        63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65,
+        65, 65, 65, 65, 65, 65, 65, 65, 63, 63, 63, 62, 62, 62, 63, 63, 63, 63,
+        64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
+        65, 65, 65, 65, 63, 63, 63, 63, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64,
+        64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
+        63, 63, 63, 63, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 65, 65, 65,
+        65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 64, 63, 63, 63,
+        63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65,
+        65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 64, 64, 63, 63, 63, 63, 63, 63,
+        64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
+        66, 66, 66, 66, 66, 66, 64, 64, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64,
+        64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66,
+        66, 66, 64, 64, 64, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 65, 65, 65,
+        65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 64, 64,
+        64, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65,
+        65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 64, 64, 64, 64, 63, 64,
+        64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66,
+        66, 66, 66, 66, 66, 66, 66, 66, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66,
+        66, 66, 66, 66, 65, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65,
+        65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+        65, 64, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65,
+        65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 65, 65, 64, 64,
+        64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66,
+        66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 65, 65, 64, 64, 64, 64, 64, 64,
+        64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66,
+        66, 66, 66, 66, 66, 66, 65, 65, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65,
+        65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+        66, 66, 65, 65, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65,
+        65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        /* Size 8 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        /* Size 16 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64,
+        /* Size 32 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
+      { /* Intra matrices */
+        /* Size 4 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        /* Size 8 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        /* Size 16 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64,
+        /* Size 32 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        /* Size 8 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        /* Size 16 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64,
+        /* Size 32 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
+      { /* Intra matrices */
+        /* Size 4 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        /* Size 8 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        /* Size 16 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64,
+        /* Size 32 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 } } }
+};
+
+static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][2][4 * 4 + 8 * 8 + 16 * 16 +
+                                                   32 * 32] = {
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 58, 33, 19, 58, 37, 25, 17, 33, 25, 16, 12, 19, 17, 12, 10,
+        /* Size 8 */
+        64, 87, 80, 59, 42, 31, 24, 19, 87, 75, 79, 66, 50, 38, 29, 23, 80, 79,
+        54, 46, 38, 31, 25, 20, 59, 66, 46, 34, 29, 24, 21, 18, 42, 50, 38, 29,
+        23, 20, 17, 15, 31, 38, 31, 24, 20, 17, 15, 13, 24, 29, 25, 21, 17, 15,
+        13, 12, 19, 23, 20, 18, 15, 13, 12, 11,
+        /* Size 16 */
+        64, 76, 87, 84, 80, 70, 59, 51, 42, 37, 31, 27, 24, 21, 19, 19, 76, 79,
+        81, 81, 80, 71, 63, 55, 46, 40, 34, 30, 26, 23, 21, 21, 87, 81, 75, 77,
+        79, 73, 66, 58, 50, 44, 38, 33, 29, 26, 23, 23, 84, 81, 77, 72, 67, 61,
+        56, 50, 44, 39, 34, 31, 27, 24, 21, 21, 80, 80, 79, 67, 54, 50, 46, 42,
+        38, 34, 31, 28, 25, 23, 20, 20, 70, 71, 73, 61, 50, 45, 40, 37, 33, 30,
+        28, 25, 23, 21, 19, 19, 59, 63, 66, 56, 46, 40, 34, 31, 29, 26, 24, 22,
+        21, 19, 18, 18, 51, 55, 58, 50, 42, 37, 31, 29, 26, 24, 22, 20, 19, 18,
+        16, 16, 42, 46, 50, 44, 38, 33, 29, 26, 23, 21, 20, 18, 17, 16, 15, 15,
+        37, 40, 44, 39, 34, 30, 26, 24, 21, 20, 18, 17, 16, 15, 14, 14, 31, 34,
+        38, 34, 31, 28, 24, 22, 20, 18, 17, 16, 15, 14, 13, 13, 27, 30, 33, 31,
+        28, 25, 22, 20, 18, 17, 16, 15, 14, 13, 13, 13, 24, 26, 29, 27, 25, 23,
+        21, 19, 17, 16, 15, 14, 13, 13, 12, 12, 21, 23, 26, 24, 23, 21, 19, 18,
+        16, 15, 14, 13, 13, 12, 12, 12, 19, 21, 23, 21, 20, 19, 18, 16, 15, 14,
+        13, 13, 12, 12, 11, 11, 19, 21, 23, 21, 20, 19, 18, 16, 15, 14, 13, 13,
+        12, 12, 11, 11,
+        /* Size 32 */
+        64, 70, 76, 82, 87, 86, 84, 82, 80, 75, 70, 65, 59, 55, 51, 47, 42, 40,
+        37, 34, 31, 29, 27, 26, 24, 22, 21, 20, 19, 19, 19, 19, 70, 74, 77, 81,
+        84, 83, 82, 81, 80, 75, 71, 66, 61, 57, 53, 49, 44, 41, 39, 36, 33, 31,
+        29, 27, 25, 24, 22, 21, 20, 20, 20, 20, 76, 77, 79, 80, 81, 81, 81, 80,
+        80, 75, 71, 67, 63, 59, 55, 51, 46, 43, 40, 37, 34, 32, 30, 28, 26, 25,
+        23, 22, 21, 21, 21, 21, 82, 81, 80, 79, 78, 79, 79, 79, 79, 76, 72, 68,
+        65, 61, 56, 52, 48, 45, 42, 39, 36, 34, 32, 30, 27, 26, 25, 23, 22, 22,
+        22, 22, 87, 84, 81, 78, 75, 76, 77, 78, 79, 76, 73, 70, 66, 62, 58, 54,
+        50, 47, 44, 41, 38, 36, 33, 31, 29, 27, 26, 24, 23, 23, 23, 23, 86, 83,
+        81, 79, 76, 75, 75, 74, 73, 70, 67, 64, 61, 58, 54, 51, 47, 44, 42, 39,
+        36, 34, 32, 30, 28, 26, 25, 23, 22, 22, 22, 22, 84, 82, 81, 79, 77, 75,
+        72, 69, 67, 64, 61, 59, 56, 53, 50, 47, 44, 42, 39, 37, 34, 32, 31, 29,
+        27, 25, 24, 23, 21, 21, 21, 21, 82, 81, 80, 79, 78, 74, 69, 65, 61, 58,
+        56, 53, 51, 48, 46, 44, 41, 39, 37, 35, 33, 31, 29, 28, 26, 25, 23, 22,
+        21, 21, 21, 21, 80, 80, 80, 79, 79, 73, 67, 61, 54, 52, 50, 48, 46, 44,
+        42, 40, 38, 36, 34, 33, 31, 29, 28, 26, 25, 24, 23, 22, 20, 20, 20, 20,
+        75, 75, 75, 76, 76, 70, 64, 58, 52, 50, 47, 45, 43, 41, 39, 37, 36, 34,
+        32, 31, 29, 28, 27, 25, 24, 23, 22, 21, 20, 20, 20, 20, 70, 71, 71, 72,
+        73, 67, 61, 56, 50, 47, 45, 42, 40, 38, 37, 35, 33, 32, 30, 29, 28, 26,
+        25, 24, 23, 22, 21, 20, 19, 19, 19, 19, 65, 66, 67, 68, 70, 64, 59, 53,
+        48, 45, 42, 40, 37, 36, 34, 32, 31, 30, 28, 27, 26, 25, 24, 23, 22, 21,
+        20, 19, 18, 18, 18, 18, 59, 61, 63, 65, 66, 61, 56, 51, 46, 43, 40, 37,
+        34, 33, 31, 30, 29, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 18, 18, 18,
+        18, 18, 55, 57, 59, 61, 62, 58, 53, 48, 44, 41, 38, 36, 33, 31, 30, 29,
+        27, 26, 25, 24, 23, 22, 21, 21, 20, 19, 18, 18, 17, 17, 17, 17, 51, 53,
+        55, 56, 58, 54, 50, 46, 42, 39, 37, 34, 31, 30, 29, 27, 26, 25, 24, 23,
+        22, 21, 20, 20, 19, 18, 18, 17, 16, 16, 16, 16, 47, 49, 51, 52, 54, 51,
+        47, 44, 40, 37, 35, 32, 30, 29, 27, 26, 24, 24, 23, 22, 21, 20, 19, 19,
+        18, 18, 17, 16, 16, 16, 16, 16, 42, 44, 46, 48, 50, 47, 44, 41, 38, 36,
+        33, 31, 29, 27, 26, 24, 23, 22, 21, 21, 20, 19, 18, 18, 17, 17, 16, 16,
+        15, 15, 15, 15, 40, 41, 43, 45, 47, 44, 42, 39, 36, 34, 32, 30, 27, 26,
+        25, 24, 22, 21, 21, 20, 19, 18, 18, 17, 17, 16, 16, 15, 15, 15, 15, 15,
+        37, 39, 40, 42, 44, 42, 39, 37, 34, 32, 30, 28, 26, 25, 24, 23, 21, 21,
+        20, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 34, 36, 37, 39,
+        41, 39, 37, 35, 33, 31, 29, 27, 25, 24, 23, 22, 21, 20, 19, 18, 18, 17,
+        16, 16, 15, 15, 15, 14, 14, 14, 14, 14, 31, 33, 34, 36, 38, 36, 34, 33,
+        31, 29, 28, 26, 24, 23, 22, 21, 20, 19, 18, 18, 17, 16, 16, 15, 15, 15,
+        14, 14, 13, 13, 13, 13, 29, 31, 32, 34, 36, 34, 32, 31, 29, 28, 26, 25,
+        23, 22, 21, 20, 19, 18, 18, 17, 16, 16, 15, 15, 14, 14, 14, 13, 13, 13,
+        13, 13, 27, 29, 30, 32, 33, 32, 31, 29, 28, 27, 25, 24, 22, 21, 20, 19,
+        18, 18, 17, 16, 16, 15, 15, 14, 14, 14, 13, 13, 13, 13, 13, 13, 26, 27,
+        28, 30, 31, 30, 29, 28, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 17, 16,
+        15, 15, 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 24, 25, 26, 27, 29, 28,
+        27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 17, 16, 15, 15, 14, 14, 14,
+        13, 13, 13, 12, 12, 12, 12, 12, 22, 24, 25, 26, 27, 26, 25, 25, 24, 23,
+        22, 21, 20, 19, 18, 18, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12,
+        12, 12, 12, 12, 21, 22, 23, 25, 26, 25, 24, 23, 23, 22, 21, 20, 19, 18,
+        18, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12,
+        20, 21, 22, 23, 24, 23, 23, 22, 22, 21, 20, 19, 18, 18, 17, 16, 16, 15,
+        15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 19, 20, 21, 22,
+        23, 22, 21, 21, 20, 20, 19, 18, 18, 17, 16, 16, 15, 15, 14, 14, 13, 13,
+        13, 12, 12, 12, 12, 11, 11, 11, 11, 11, 19, 20, 21, 22, 23, 22, 21, 21,
+        20, 20, 19, 18, 18, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12,
+        12, 11, 11, 11, 11, 11, 19, 20, 21, 22, 23, 22, 21, 21, 20, 20, 19, 18,
+        18, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11,
+        11, 11, 19, 20, 21, 22, 23, 22, 21, 21, 20, 20, 19, 18, 18, 17, 16, 16,
+        15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11 },
+      { /* Intra matrices */
+        /* Size 4 */
+        249, 225, 124, 69, 225, 139, 91, 60, 124, 91, 57, 42, 69, 60, 42, 32,
+        /* Size 8 */
+        206, 285, 261, 191, 134, 96, 71, 55, 285, 245, 257, 214, 161, 118, 88,
+        68, 261, 257, 174, 145, 119, 95, 75, 60, 191, 214, 145, 107, 87, 73, 61,
+        51, 134, 161, 119, 87, 69, 58, 50, 43, 96, 118, 95, 73, 58, 48, 42, 37,
+        71, 88, 75, 61, 50, 42, 36, 32, 55, 68, 60, 51, 43, 37, 32, 29,
+        /* Size 16 */
+        217, 259, 300, 287, 275, 237, 200, 171, 141, 121, 101, 88, 75, 66, 57,
+        57, 259, 269, 279, 275, 272, 243, 213, 184, 155, 134, 113, 98, 84, 74,
+        64, 64, 300, 279, 257, 264, 270, 248, 226, 197, 169, 147, 124, 109, 93,
+        82, 71, 71, 287, 275, 264, 245, 227, 208, 189, 168, 147, 130, 112, 99,
+        86, 77, 67, 67, 275, 272, 270, 227, 183, 168, 152, 139, 125, 113, 100,
+        90, 79, 71, 63, 63, 237, 243, 248, 208, 168, 150, 132, 120, 109, 98, 88,
+        80, 72, 65, 58, 58, 200, 213, 226, 189, 152, 132, 113, 102, 92, 84, 77,
+        70, 64, 59, 54, 54, 171, 184, 197, 168, 139, 120, 102, 92, 82, 75, 69,
+        63, 58, 54, 49, 49, 141, 155, 169, 147, 125, 109, 92, 82, 73, 67, 61,
+        56, 52, 49, 45, 45, 121, 134, 147, 130, 113, 98, 84, 75, 67, 61, 56, 52,
+        48, 45, 42, 42, 101, 113, 124, 112, 100, 88, 77, 69, 61, 56, 50, 47, 44,
+        41, 39, 39, 88, 98, 109, 99, 90, 80, 70, 63, 56, 52, 47, 44, 41, 39, 36,
+        36, 75, 84, 93, 86, 79, 72, 64, 58, 52, 48, 44, 41, 38, 36, 34, 34, 66,
+        74, 82, 77, 71, 65, 59, 54, 49, 45, 41, 39, 36, 34, 32, 32, 57, 64, 71,
+        67, 63, 58, 54, 49, 45, 42, 39, 36, 34, 32, 31, 31, 57, 64, 71, 67, 63,
+        58, 54, 49, 45, 42, 39, 36, 34, 32, 31, 31,
+        /* Size 32 */
+        223, 244, 265, 287, 308, 301, 295, 288, 282, 263, 244, 225, 206, 190,
+        175, 160, 145, 134, 124, 114, 104, 97, 90, 83, 77, 72, 68, 63, 59, 59,
+        59, 59, 244, 257, 271, 284, 297, 293, 289, 285, 281, 264, 246, 229, 212,
+        197, 182, 167, 152, 141, 131, 120, 110, 103, 95, 88, 81, 77, 72, 67, 62,
+        62, 62, 62, 265, 271, 276, 281, 286, 284, 283, 281, 280, 264, 249, 234,
+        219, 204, 189, 174, 159, 148, 137, 127, 116, 108, 101, 93, 86, 81, 76,
+        71, 66, 66, 66, 66, 287, 284, 281, 278, 275, 276, 277, 278, 278, 265,
+        252, 238, 225, 210, 196, 181, 166, 155, 144, 133, 122, 114, 106, 98, 91,
+        85, 80, 75, 69, 69, 69, 69, 308, 297, 286, 275, 264, 267, 271, 274, 277,
+        266, 254, 243, 231, 217, 203, 188, 174, 162, 151, 139, 128, 120, 111,
+        103, 95, 90, 84, 78, 73, 73, 73, 73, 301, 293, 284, 276, 267, 264, 261,
+        258, 255, 244, 234, 223, 213, 200, 187, 175, 162, 152, 142, 132, 121,
+        114, 107, 99, 92, 87, 81, 76, 71, 71, 71, 71, 295, 289, 283, 277, 271,
+        261, 252, 242, 233, 223, 213, 203, 194, 183, 172, 162, 151, 142, 133,
+        124, 115, 108, 102, 95, 88, 83, 79, 74, 69, 69, 69, 69, 288, 285, 281,
+        278, 274, 258, 242, 226, 210, 201, 193, 184, 175, 166, 157, 149, 140,
+        132, 124, 117, 109, 103, 97, 91, 85, 80, 76, 71, 67, 67, 67, 67, 282,
+        281, 280, 278, 277, 255, 233, 210, 188, 180, 172, 164, 156, 149, 142,
+        135, 129, 122, 116, 109, 103, 97, 92, 87, 81, 77, 73, 69, 65, 65, 65,
+        65, 263, 264, 264, 265, 266, 244, 223, 201, 180, 171, 163, 154, 146,
+        139, 133, 126, 120, 114, 108, 103, 97, 92, 87, 82, 77, 74, 70, 66, 62,
+        62, 62, 62, 244, 246, 249, 252, 254, 234, 213, 193, 172, 163, 154, 145,
+        136, 130, 124, 118, 111, 106, 101, 96, 91, 86, 82, 78, 73, 70, 67, 63,
+        60, 60, 60, 60, 225, 229, 234, 238, 243, 223, 203, 184, 164, 154, 145,
+        135, 126, 120, 114, 109, 103, 98, 94, 89, 85, 81, 77, 73, 70, 67, 64,
+        61, 57, 57, 57, 57, 206, 212, 219, 225, 231, 213, 194, 175, 156, 146,
+        136, 126, 116, 110, 105, 100, 94, 90, 87, 83, 79, 76, 72, 69, 66, 63,
+        60, 58, 55, 55, 55, 55, 190, 197, 204, 210, 217, 200, 183, 166, 149,
+        139, 130, 120, 110, 105, 100, 95, 89, 86, 82, 78, 75, 72, 69, 66, 63,
+        60, 58, 55, 53, 53, 53, 53, 175, 182, 189, 196, 203, 187, 172, 157, 142,
+        133, 124, 114, 105, 100, 95, 90, 84, 81, 77, 74, 71, 68, 65, 62, 60, 57,
+        55, 53, 51, 51, 51, 51, 160, 167, 174, 181, 188, 175, 162, 149, 135,
+        126, 118, 109, 100, 95, 90, 84, 79, 76, 73, 70, 66, 64, 61, 59, 57, 55,
+        53, 51, 49, 49, 49, 49, 145, 152, 159, 166, 174, 162, 151, 140, 129,
+        120, 111, 103, 94, 89, 84, 79, 74, 71, 68, 65, 62, 60, 58, 56, 53, 52,
+        50, 48, 46, 46, 46, 46, 134, 141, 148, 155, 162, 152, 142, 132, 122,
+        114, 106, 98, 90, 86, 81, 76, 71, 68, 66, 63, 60, 58, 55, 53, 51, 50,
+        48, 46, 45, 45, 45, 45, 124, 131, 137, 144, 151, 142, 133, 124, 116,
+        108, 101, 94, 87, 82, 77, 73, 68, 66, 63, 60, 57, 55, 53, 51, 49, 48,
+        46, 45, 43, 43, 43, 43, 114, 120, 127, 133, 139, 132, 124, 117, 109,
+        103, 96, 89, 83, 78, 74, 70, 65, 63, 60, 57, 54, 53, 51, 49, 47, 46, 44,
+        43, 41, 41, 41, 41, 104, 110, 116, 122, 128, 121, 115, 109, 103, 97, 91,
+        85, 79, 75, 71, 66, 62, 60, 57, 54, 52, 50, 48, 47, 45, 44, 42, 41, 40,
+        40, 40, 40, 97, 103, 108, 114, 120, 114, 108, 103, 97, 92, 86, 81, 76,
+        72, 68, 64, 60, 58, 55, 53, 50, 48, 47, 45, 43, 42, 41, 40, 38, 38, 38,
+        38, 90, 95, 101, 106, 111, 107, 102, 97, 92, 87, 82, 77, 72, 69, 65, 61,
+        58, 55, 53, 51, 48, 47, 45, 44, 42, 41, 40, 38, 37, 37, 37, 37, 83, 88,
+        93, 98, 103, 99, 95, 91, 87, 82, 78, 73, 69, 66, 62, 59, 56, 53, 51, 49,
+        47, 45, 44, 42, 40, 39, 38, 37, 36, 36, 36, 36, 77, 81, 86, 91, 95, 92,
+        88, 85, 81, 77, 73, 70, 66, 63, 60, 57, 53, 51, 49, 47, 45, 43, 42, 40,
+        39, 38, 37, 36, 35, 35, 35, 35, 72, 77, 81, 85, 90, 87, 83, 80, 77, 74,
+        70, 67, 63, 60, 57, 55, 52, 50, 48, 46, 44, 42, 41, 39, 38, 37, 36, 35,
+        34, 34, 34, 34, 68, 72, 76, 80, 84, 81, 79, 76, 73, 70, 67, 64, 60, 58,
+        55, 53, 50, 48, 46, 44, 42, 41, 40, 38, 37, 36, 35, 34, 33, 33, 33, 33,
+        63, 67, 71, 75, 78, 76, 74, 71, 69, 66, 63, 61, 58, 55, 53, 51, 48, 46,
+        45, 43, 41, 40, 38, 37, 36, 35, 34, 33, 32, 32, 32, 32, 59, 62, 66, 69,
+        73, 71, 69, 67, 65, 62, 60, 57, 55, 53, 51, 49, 46, 45, 43, 41, 40, 38,
+        37, 36, 35, 34, 33, 32, 31, 31, 31, 31, 59, 62, 66, 69, 73, 71, 69, 67,
+        65, 62, 60, 57, 55, 53, 51, 49, 46, 45, 43, 41, 40, 38, 37, 36, 35, 34,
+        33, 32, 31, 31, 31, 31, 59, 62, 66, 69, 73, 71, 69, 67, 65, 62, 60, 57,
+        55, 53, 51, 49, 46, 45, 43, 41, 40, 38, 37, 36, 35, 34, 33, 32, 31, 31,
+        31, 31, 59, 62, 66, 69, 73, 71, 69, 67, 65, 62, 60, 57, 55, 53, 51, 49,
+        46, 45, 43, 41, 40, 38, 37, 36, 35, 34, 33, 32, 31, 31, 31, 31 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 39, 35, 27, 39, 31, 29, 25, 35, 29, 21, 18, 27, 25, 18, 15,
+        /* Size 8 */
+        64, 81, 42, 39, 36, 32, 28, 24, 81, 54, 41, 46, 44, 40, 35, 30, 42, 41,
+        34, 36, 36, 34, 31, 27, 39, 46, 36, 31, 29, 28, 26, 24, 36, 44, 36, 29,
+        26, 24, 22, 20, 32, 40, 34, 28, 24, 21, 19, 18, 28, 35, 31, 26, 22, 19,
+        17, 16, 24, 30, 27, 24, 20, 18, 16, 15,
+        /* Size 16 */
+        64, 72, 81, 61, 42, 41, 39, 38, 36, 34, 32, 30, 28, 26, 24, 24, 72, 70,
+        67, 54, 42, 42, 43, 42, 40, 38, 36, 34, 31, 29, 27, 27, 81, 67, 54, 48,
+        41, 44, 46, 45, 44, 42, 40, 37, 35, 32, 30, 30, 61, 54, 48, 43, 38, 39,
+        41, 40, 40, 39, 37, 35, 33, 31, 29, 29, 42, 42, 41, 38, 34, 35, 36, 36,
+        36, 35, 34, 32, 31, 29, 27, 27, 41, 42, 44, 39, 35, 34, 33, 33, 33, 32,
+        31, 30, 28, 27, 25, 25, 39, 43, 46, 41, 36, 33, 31, 30, 29, 29, 28, 27,
+        26, 25, 24, 24, 38, 42, 45, 40, 36, 33, 30, 29, 27, 27, 26, 25, 24, 23,
+        22, 22, 36, 40, 44, 40, 36, 33, 29, 27, 26, 25, 24, 23, 22, 21, 20, 20,
+        34, 38, 42, 39, 35, 32, 29, 27, 25, 23, 22, 21, 21, 20, 19, 19, 32, 36,
+        40, 37, 34, 31, 28, 26, 24, 22, 21, 20, 19, 19, 18, 18, 30, 34, 37, 35,
+        32, 30, 27, 25, 23, 21, 20, 19, 18, 18, 17, 17, 28, 31, 35, 33, 31, 28,
+        26, 24, 22, 21, 19, 18, 17, 17, 16, 16, 26, 29, 32, 31, 29, 27, 25, 23,
+        21, 20, 19, 18, 17, 16, 15, 15, 24, 27, 30, 29, 27, 25, 24, 22, 20, 19,
+        18, 17, 16, 15, 15, 15, 24, 27, 30, 29, 27, 25, 24, 22, 20, 19, 18, 17,
+        16, 15, 15, 15,
+        /* Size 32 */
+        64, 68, 72, 76, 81, 71, 61, 52, 42, 41, 41, 40, 39, 39, 38, 37, 36, 35,
+        34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 24, 24, 24, 68, 70, 71, 73,
+        74, 66, 58, 50, 42, 42, 41, 41, 41, 40, 40, 39, 38, 37, 36, 35, 34, 33,
+        32, 31, 29, 28, 27, 26, 25, 25, 25, 25, 72, 71, 70, 69, 67, 61, 54, 48,
+        42, 42, 42, 42, 43, 42, 42, 41, 40, 39, 38, 37, 36, 35, 34, 32, 31, 30,
+        29, 28, 27, 27, 27, 27, 76, 73, 69, 65, 61, 56, 51, 46, 41, 42, 43, 44,
+        44, 44, 43, 43, 42, 41, 40, 39, 38, 37, 36, 34, 33, 32, 31, 30, 29, 29,
+        29, 29, 81, 74, 67, 61, 54, 51, 48, 44, 41, 42, 44, 45, 46, 46, 45, 45,
+        44, 43, 42, 41, 40, 39, 37, 36, 35, 34, 32, 31, 30, 30, 30, 30, 71, 66,
+        61, 56, 51, 48, 45, 42, 39, 40, 41, 42, 43, 43, 43, 43, 42, 41, 40, 39,
+        38, 37, 36, 35, 34, 33, 32, 30, 29, 29, 29, 29, 61, 58, 54, 51, 48, 45,
+        43, 40, 38, 38, 39, 40, 41, 41, 40, 40, 40, 39, 39, 38, 37, 36, 35, 34,
+        33, 32, 31, 30, 29, 29, 29, 29, 52, 50, 48, 46, 44, 42, 40, 38, 36, 37,
+        37, 38, 38, 38, 38, 38, 38, 37, 37, 36, 35, 34, 33, 33, 32, 31, 30, 29,
+        28, 28, 28, 28, 42, 42, 42, 41, 41, 39, 38, 36, 34, 35, 35, 35, 36, 36,
+        36, 36, 36, 35, 35, 34, 34, 33, 32, 31, 31, 30, 29, 28, 27, 27, 27, 27,
+        41, 42, 42, 42, 42, 40, 38, 37, 35, 35, 35, 35, 35, 34, 34, 34, 34, 34,
+        33, 33, 32, 32, 31, 30, 29, 29, 28, 27, 26, 26, 26, 26, 41, 41, 42, 43,
+        44, 41, 39, 37, 35, 35, 34, 34, 33, 33, 33, 33, 33, 32, 32, 31, 31, 30,
+        30, 29, 28, 28, 27, 26, 25, 25, 25, 25, 40, 41, 42, 44, 45, 42, 40, 38,
+        35, 35, 34, 33, 32, 32, 32, 31, 31, 30, 30, 30, 29, 29, 28, 28, 27, 26,
+        26, 25, 25, 25, 25, 25, 39, 41, 43, 44, 46, 43, 41, 38, 36, 35, 33, 32,
+        31, 31, 30, 30, 29, 29, 29, 28, 28, 27, 27, 26, 26, 25, 25, 24, 24, 24,
+        24, 24, 39, 40, 42, 44, 46, 43, 41, 38, 36, 34, 33, 32, 31, 30, 29, 29,
+        28, 28, 28, 27, 27, 26, 26, 25, 25, 24, 24, 23, 23, 23, 23, 23, 38, 40,
+        42, 43, 45, 43, 40, 38, 36, 34, 33, 32, 30, 29, 29, 28, 27, 27, 27, 26,
+        26, 25, 25, 24, 24, 23, 23, 23, 22, 22, 22, 22, 37, 39, 41, 43, 45, 43,
+        40, 38, 36, 34, 33, 31, 30, 29, 28, 27, 27, 26, 26, 25, 25, 24, 24, 23,
+        23, 23, 22, 22, 21, 21, 21, 21, 36, 38, 40, 42, 44, 42, 40, 38, 36, 34,
+        33, 31, 29, 28, 27, 27, 26, 25, 25, 24, 24, 23, 23, 22, 22, 22, 21, 21,
+        20, 20, 20, 20, 35, 37, 39, 41, 43, 41, 39, 37, 35, 34, 32, 30, 29, 28,
+        27, 26, 25, 25, 24, 23, 23, 23, 22, 22, 21, 21, 21, 20, 20, 20, 20, 20,
+        34, 36, 38, 40, 42, 40, 39, 37, 35, 33, 32, 30, 29, 28, 27, 26, 25, 24,
+        23, 23, 22, 22, 21, 21, 21, 20, 20, 20, 19, 19, 19, 19, 33, 35, 37, 39,
+        41, 39, 38, 36, 34, 33, 31, 30, 28, 27, 26, 25, 24, 23, 23, 22, 22, 21,
+        21, 20, 20, 20, 19, 19, 19, 19, 19, 19, 32, 34, 36, 38, 40, 38, 37, 35,
+        34, 32, 31, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 21, 20, 20, 19, 19,
+        19, 18, 18, 18, 18, 18, 31, 33, 35, 37, 39, 37, 36, 34, 33, 32, 30, 29,
+        27, 26, 25, 24, 23, 23, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17,
+        17, 17, 30, 32, 34, 36, 37, 36, 35, 33, 32, 31, 30, 28, 27, 26, 25, 24,
+        23, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 29, 31,
+        32, 34, 36, 35, 34, 33, 31, 30, 29, 28, 26, 25, 24, 23, 22, 22, 21, 20,
+        20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 16, 28, 29, 31, 33, 35, 34,
+        33, 32, 31, 29, 28, 27, 26, 25, 24, 23, 22, 21, 21, 20, 19, 19, 18, 18,
+        17, 17, 17, 16, 16, 16, 16, 16, 27, 28, 30, 32, 34, 33, 32, 31, 30, 29,
+        28, 26, 25, 24, 23, 23, 22, 21, 20, 20, 19, 18, 18, 17, 17, 17, 16, 16,
+        16, 16, 16, 16, 26, 27, 29, 31, 32, 32, 31, 30, 29, 28, 27, 26, 25, 24,
+        23, 22, 21, 21, 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 15, 15, 15, 15,
+        25, 26, 28, 30, 31, 30, 30, 29, 28, 27, 26, 25, 24, 23, 23, 22, 21, 20,
+        20, 19, 18, 18, 17, 17, 16, 16, 16, 15, 15, 15, 15, 15, 24, 25, 27, 29,
+        30, 29, 29, 28, 27, 26, 25, 25, 24, 23, 22, 21, 20, 20, 19, 19, 18, 17,
+        17, 16, 16, 16, 15, 15, 15, 15, 15, 15, 24, 25, 27, 29, 30, 29, 29, 28,
+        27, 26, 25, 25, 24, 23, 22, 21, 20, 20, 19, 19, 18, 17, 17, 16, 16, 16,
+        15, 15, 15, 15, 15, 15, 24, 25, 27, 29, 30, 29, 29, 28, 27, 26, 25, 25,
+        24, 23, 22, 21, 20, 20, 19, 19, 18, 17, 17, 16, 16, 16, 15, 15, 15, 15,
+        15, 15, 24, 25, 27, 29, 30, 29, 29, 28, 27, 26, 25, 25, 24, 23, 22, 21,
+        20, 20, 19, 19, 18, 17, 17, 16, 16, 16, 15, 15, 15, 15, 15, 15 },
+      { /* Intra matrices */
+        /* Size 4 */
+        175, 103, 93, 70, 103, 83, 76, 64, 93, 76, 55, 46, 70, 64, 46, 36,
+        /* Size 8 */
+        162, 205, 104, 97, 90, 78, 67, 57, 205, 136, 102, 115, 111, 99, 86, 73,
+        104, 102, 84, 88, 88, 83, 75, 66, 97, 115, 88, 75, 71, 67, 62, 56, 90,
+        111, 88, 71, 62, 56, 52, 48, 78, 99, 83, 67, 56, 49, 45, 41, 67, 86, 75,
+        62, 52, 45, 40, 36, 57, 73, 66, 56, 48, 41, 36, 33,
+        /* Size 16 */
+        168, 190, 213, 160, 108, 104, 101, 97, 93, 87, 81, 75, 69, 64, 59, 59,
+        190, 184, 177, 142, 107, 108, 110, 107, 104, 98, 92, 85, 79, 73, 67, 67,
+        213, 177, 141, 123, 106, 112, 119, 117, 115, 109, 103, 96, 89, 82, 76,
+        76, 160, 142, 123, 110, 96, 101, 105, 104, 103, 99, 94, 89, 83, 77, 72,
+        72, 108, 107, 106, 96, 87, 89, 91, 91, 91, 89, 86, 82, 77, 73, 68, 68,
+        104, 108, 112, 101, 89, 87, 85, 84, 82, 80, 78, 74, 71, 67, 63, 63, 101,
+        110, 119, 105, 91, 85, 78, 76, 74, 72, 70, 67, 64, 61, 58, 58, 97, 107,
+        117, 104, 91, 84, 76, 72, 69, 66, 64, 62, 59, 57, 54, 54, 93, 104, 115,
+        103, 91, 82, 74, 69, 64, 61, 58, 56, 54, 52, 50, 50, 87, 98, 109, 99,
+        89, 80, 72, 66, 61, 58, 55, 52, 50, 48, 46, 46, 81, 92, 103, 94, 86, 78,
+        70, 64, 58, 55, 51, 49, 46, 45, 43, 43, 75, 85, 96, 89, 82, 74, 67, 62,
+        56, 52, 49, 46, 44, 42, 40, 40, 69, 79, 89, 83, 77, 71, 64, 59, 54, 50,
+        46, 44, 41, 40, 38, 38, 64, 73, 82, 77, 73, 67, 61, 57, 52, 48, 45, 42,
+        40, 38, 36, 36, 59, 67, 76, 72, 68, 63, 58, 54, 50, 46, 43, 40, 38, 36,
+        34, 34, 59, 67, 76, 72, 68, 63, 58, 54, 50, 46, 43, 40, 38, 36, 34, 34,
+        /* Size 32 */
+        171, 182, 194, 205, 217, 190, 163, 137, 110, 108, 106, 104, 103, 101,
+        99, 97, 95, 92, 89, 86, 83, 80, 77, 74, 71, 68, 65, 63, 60, 60, 60, 60,
+        182, 186, 190, 194, 198, 176, 154, 132, 109, 109, 108, 108, 107, 105,
+        104, 102, 100, 97, 94, 91, 88, 85, 82, 79, 76, 73, 70, 67, 64, 64, 64,
+        64, 194, 190, 187, 184, 180, 162, 144, 127, 109, 110, 110, 111, 112,
+        110, 109, 107, 106, 103, 100, 97, 94, 90, 87, 84, 80, 78, 75, 72, 69,
+        69, 69, 69, 205, 194, 184, 173, 162, 148, 135, 122, 108, 110, 112, 114,
+        116, 115, 114, 113, 111, 108, 105, 102, 99, 96, 92, 89, 85, 82, 79, 76,
+        73, 73, 73, 73, 217, 198, 180, 162, 144, 135, 126, 117, 108, 111, 114,
+        118, 121, 120, 119, 118, 117, 114, 111, 108, 104, 101, 97, 94, 90, 87,
+        84, 80, 77, 77, 77, 77, 190, 176, 162, 148, 135, 127, 119, 111, 103,
+        106, 108, 111, 114, 113, 112, 112, 111, 108, 106, 103, 100, 97, 94, 91,
+        87, 84, 81, 78, 75, 75, 75, 75, 163, 154, 144, 135, 126, 119, 112, 105,
+        98, 100, 103, 105, 107, 106, 106, 105, 105, 103, 101, 98, 96, 93, 90,
+        87, 85, 82, 79, 76, 73, 73, 73, 73, 137, 132, 127, 122, 117, 111, 105,
+        99, 94, 95, 97, 98, 100, 100, 99, 99, 99, 97, 95, 94, 92, 89, 87, 84,
+        82, 79, 76, 74, 71, 71, 71, 71, 110, 109, 109, 108, 108, 103, 98, 94,
+        89, 90, 91, 92, 93, 93, 93, 93, 93, 92, 90, 89, 87, 85, 83, 81, 79, 76,
+        74, 72, 69, 69, 69, 69, 108, 109, 110, 110, 111, 106, 100, 95, 90, 90,
+        90, 90, 89, 89, 89, 89, 89, 87, 86, 85, 83, 81, 79, 77, 75, 73, 71, 69,
+        67, 67, 67, 67, 106, 108, 110, 112, 114, 108, 103, 97, 91, 90, 89, 87,
+        86, 86, 85, 85, 84, 83, 82, 80, 79, 77, 76, 74, 72, 70, 68, 66, 64, 64,
+        64, 64, 104, 108, 111, 114, 118, 111, 105, 98, 92, 90, 87, 85, 83, 82,
+        81, 80, 79, 78, 77, 76, 75, 74, 72, 70, 69, 67, 65, 64, 62, 62, 62, 62,
+        103, 107, 112, 116, 121, 114, 107, 100, 93, 89, 86, 83, 80, 78, 77, 76,
+        75, 74, 73, 72, 71, 70, 68, 67, 66, 64, 63, 61, 60, 60, 60, 60, 101,
+        105, 110, 115, 120, 113, 106, 100, 93, 89, 86, 82, 78, 77, 75, 74, 72,
+        71, 70, 69, 68, 67, 65, 64, 63, 62, 60, 59, 57, 57, 57, 57, 99, 104,
+        109, 114, 119, 112, 106, 99, 93, 89, 85, 81, 77, 75, 74, 72, 70, 69, 68,
+        66, 65, 64, 63, 61, 60, 59, 58, 56, 55, 55, 55, 55, 97, 102, 107, 113,
+        118, 112, 105, 99, 93, 89, 85, 80, 76, 74, 72, 70, 67, 66, 65, 64, 62,
+        61, 60, 59, 58, 56, 55, 54, 53, 53, 53, 53, 95, 100, 106, 111, 117, 111,
+        105, 99, 93, 89, 84, 79, 75, 72, 70, 67, 65, 64, 62, 61, 59, 58, 57, 56,
+        55, 54, 53, 52, 51, 51, 51, 51, 92, 97, 103, 108, 114, 108, 103, 97, 92,
+        87, 83, 78, 74, 71, 69, 66, 64, 62, 61, 59, 57, 56, 55, 54, 53, 52, 51,
+        50, 49, 49, 49, 49, 89, 94, 100, 105, 111, 106, 101, 95, 90, 86, 82, 77,
+        73, 70, 68, 65, 62, 61, 59, 57, 56, 55, 53, 52, 51, 50, 49, 48, 47, 47,
+        47, 47, 86, 91, 97, 102, 108, 103, 98, 94, 89, 85, 80, 76, 72, 69, 66,
+        64, 61, 59, 57, 56, 54, 53, 52, 50, 49, 48, 47, 46, 45, 45, 45, 45, 83,
+        88, 94, 99, 104, 100, 96, 92, 87, 83, 79, 75, 71, 68, 65, 62, 59, 57,
+        56, 54, 52, 51, 50, 48, 47, 46, 45, 44, 44, 44, 44, 44, 80, 85, 90, 96,
+        101, 97, 93, 89, 85, 81, 77, 74, 70, 67, 64, 61, 58, 56, 55, 53, 51, 50,
+        48, 47, 46, 45, 44, 43, 42, 42, 42, 42, 77, 82, 87, 92, 97, 94, 90, 87,
+        83, 79, 76, 72, 68, 65, 63, 60, 57, 55, 53, 52, 50, 48, 47, 46, 45, 44,
+        43, 42, 41, 41, 41, 41, 74, 79, 84, 89, 94, 91, 87, 84, 81, 77, 74, 70,
+        67, 64, 61, 59, 56, 54, 52, 50, 48, 47, 46, 45, 43, 42, 42, 41, 40, 40,
+        40, 40, 71, 76, 80, 85, 90, 87, 85, 82, 79, 75, 72, 69, 66, 63, 60, 58,
+        55, 53, 51, 49, 47, 46, 45, 43, 42, 41, 40, 39, 38, 38, 38, 38, 68, 73,
+        78, 82, 87, 84, 82, 79, 76, 73, 70, 67, 64, 62, 59, 56, 54, 52, 50, 48,
+        46, 45, 44, 42, 41, 40, 39, 38, 37, 37, 37, 37, 65, 70, 75, 79, 84, 81,
+        79, 76, 74, 71, 68, 65, 63, 60, 58, 55, 53, 51, 49, 47, 45, 44, 43, 42,
+        40, 39, 38, 37, 37, 37, 37, 37, 63, 67, 72, 76, 80, 78, 76, 74, 72, 69,
+        66, 64, 61, 59, 56, 54, 52, 50, 48, 46, 44, 43, 42, 41, 39, 38, 37, 37,
+        36, 36, 36, 36, 60, 64, 69, 73, 77, 75, 73, 71, 69, 67, 64, 62, 60, 57,
+        55, 53, 51, 49, 47, 45, 44, 42, 41, 40, 38, 37, 37, 36, 35, 35, 35, 35,
+        60, 64, 69, 73, 77, 75, 73, 71, 69, 67, 64, 62, 60, 57, 55, 53, 51, 49,
+        47, 45, 44, 42, 41, 40, 38, 37, 37, 36, 35, 35, 35, 35, 60, 64, 69, 73,
+        77, 75, 73, 71, 69, 67, 64, 62, 60, 57, 55, 53, 51, 49, 47, 45, 44, 42,
+        41, 40, 38, 37, 37, 36, 35, 35, 35, 35, 60, 64, 69, 73, 77, 75, 73, 71,
+        69, 67, 64, 62, 60, 57, 55, 53, 51, 49, 47, 45, 44, 42, 41, 40, 38, 37,
+        37, 36, 35, 35, 35, 35 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 58, 34, 21, 58, 38, 26, 19, 34, 26, 18, 14, 21, 19, 14, 12,
+        /* Size 8 */
+        64, 86, 80, 59, 43, 33, 25, 21, 86, 75, 78, 66, 51, 39, 30, 24, 80, 78,
+        55, 46, 39, 32, 27, 22, 59, 66, 46, 36, 30, 26, 23, 20, 43, 51, 39, 30,
+        25, 22, 19, 17, 33, 39, 32, 26, 22, 19, 17, 16, 25, 30, 27, 23, 19, 17,
+        16, 14, 21, 24, 22, 20, 17, 16, 14, 14,
+        /* Size 16 */
+        64, 75, 86, 83, 80, 70, 59, 51, 43, 38, 33, 29, 25, 23, 21, 21, 75, 78,
+        81, 80, 79, 71, 63, 55, 47, 41, 36, 32, 28, 25, 23, 23, 86, 81, 75, 77,
+        78, 72, 66, 59, 51, 45, 39, 35, 30, 27, 24, 24, 83, 80, 77, 72, 67, 61,
+        56, 51, 45, 40, 36, 32, 29, 26, 23, 23, 80, 79, 78, 67, 55, 51, 46, 43,
+        39, 36, 32, 29, 27, 24, 22, 22, 70, 71, 72, 61, 51, 46, 41, 38, 35, 32,
+        29, 27, 25, 23, 21, 21, 59, 63, 66, 56, 46, 41, 36, 33, 30, 28, 26, 24,
+        23, 21, 20, 20, 51, 55, 59, 51, 43, 38, 33, 30, 28, 26, 24, 22, 21, 20,
+        19, 19, 43, 47, 51, 45, 39, 35, 30, 28, 25, 23, 22, 21, 19, 18, 17, 17,
+        38, 41, 45, 40, 36, 32, 28, 26, 23, 22, 20, 19, 18, 17, 17, 17, 33, 36,
+        39, 36, 32, 29, 26, 24, 22, 20, 19, 18, 17, 16, 16, 16, 29, 32, 35, 32,
+        29, 27, 24, 22, 21, 19, 18, 17, 16, 16, 15, 15, 25, 28, 30, 29, 27, 25,
+        23, 21, 19, 18, 17, 16, 16, 15, 14, 14, 23, 25, 27, 26, 24, 23, 21, 20,
+        18, 17, 16, 16, 15, 14, 14, 14, 21, 23, 24, 23, 22, 21, 20, 19, 17, 17,
+        16, 15, 14, 14, 14, 14, 21, 23, 24, 23, 22, 21, 20, 19, 17, 17, 16, 15,
+        14, 14, 14, 14,
+        /* Size 32 */
+        64, 70, 75, 81, 86, 85, 83, 81, 80, 75, 70, 64, 59, 55, 51, 47, 43, 41,
+        38, 35, 33, 31, 29, 27, 25, 24, 23, 22, 21, 21, 21, 21, 70, 73, 77, 80,
+        84, 82, 81, 80, 79, 75, 70, 66, 61, 57, 53, 49, 45, 43, 40, 37, 34, 32,
+        30, 29, 27, 25, 24, 23, 22, 22, 22, 22, 75, 77, 78, 79, 81, 80, 80, 79,
+        79, 75, 71, 67, 63, 59, 55, 51, 47, 44, 41, 39, 36, 34, 32, 30, 28, 27,
+        25, 24, 23, 23, 23, 23, 81, 80, 79, 78, 78, 78, 78, 78, 79, 75, 72, 68,
+        65, 61, 57, 53, 49, 46, 43, 40, 37, 35, 33, 31, 29, 28, 26, 25, 24, 24,
+        24, 24, 86, 84, 81, 78, 75, 76, 77, 77, 78, 75, 72, 69, 66, 62, 59, 55,
+        51, 48, 45, 42, 39, 37, 35, 33, 30, 29, 27, 26, 24, 24, 24, 24, 85, 82,
+        80, 78, 76, 75, 74, 73, 72, 70, 67, 64, 61, 58, 55, 51, 48, 45, 43, 40,
+        37, 35, 33, 31, 29, 28, 27, 25, 24, 24, 24, 24, 83, 81, 80, 78, 77, 74,
+        72, 69, 67, 64, 61, 59, 56, 54, 51, 48, 45, 43, 40, 38, 36, 34, 32, 30,
+        29, 27, 26, 25, 23, 23, 23, 23, 81, 80, 79, 78, 77, 73, 69, 65, 61, 58,
+        56, 54, 51, 49, 47, 44, 42, 40, 38, 36, 34, 32, 31, 29, 28, 26, 25, 24,
+        23, 23, 23, 23, 80, 79, 79, 79, 78, 72, 67, 61, 55, 53, 51, 48, 46, 45,
+        43, 41, 39, 37, 36, 34, 32, 31, 29, 28, 27, 26, 24, 23, 22, 22, 22, 22,
+        75, 75, 75, 75, 75, 70, 64, 58, 53, 50, 48, 46, 44, 42, 40, 39, 37, 35,
+        34, 32, 31, 29, 28, 27, 26, 25, 24, 23, 22, 22, 22, 22, 70, 70, 71, 72,
+        72, 67, 61, 56, 51, 48, 46, 43, 41, 39, 38, 36, 35, 33, 32, 31, 29, 28,
+        27, 26, 25, 24, 23, 22, 21, 21, 21, 21, 64, 66, 67, 68, 69, 64, 59, 54,
+        48, 46, 43, 41, 38, 37, 35, 34, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23,
+        22, 21, 20, 20, 20, 20, 59, 61, 63, 65, 66, 61, 56, 51, 46, 44, 41, 38,
+        36, 34, 33, 32, 30, 29, 28, 27, 26, 25, 24, 23, 23, 22, 21, 20, 20, 20,
+        20, 20, 55, 57, 59, 61, 62, 58, 54, 49, 45, 42, 39, 37, 34, 33, 32, 30,
+        29, 28, 27, 26, 25, 24, 23, 23, 22, 21, 20, 20, 19, 19, 19, 19, 51, 53,
+        55, 57, 59, 55, 51, 47, 43, 40, 38, 35, 33, 32, 30, 29, 28, 27, 26, 25,
+        24, 23, 22, 22, 21, 20, 20, 19, 19, 19, 19, 19, 47, 49, 51, 53, 55, 51,
+        48, 44, 41, 39, 36, 34, 32, 30, 29, 28, 26, 25, 24, 24, 23, 22, 21, 21,
+        20, 20, 19, 19, 18, 18, 18, 18, 43, 45, 47, 49, 51, 48, 45, 42, 39, 37,
+        35, 32, 30, 29, 28, 26, 25, 24, 23, 22, 22, 21, 21, 20, 19, 19, 18, 18,
+        17, 17, 17, 17, 41, 43, 44, 46, 48, 45, 43, 40, 37, 35, 33, 31, 29, 28,
+        27, 25, 24, 23, 23, 22, 21, 20, 20, 19, 19, 18, 18, 17, 17, 17, 17, 17,
+        38, 40, 41, 43, 45, 43, 40, 38, 36, 34, 32, 30, 28, 27, 26, 24, 23, 23,
+        22, 21, 20, 20, 19, 19, 18, 18, 17, 17, 17, 17, 17, 17, 35, 37, 39, 40,
+        42, 40, 38, 36, 34, 32, 31, 29, 27, 26, 25, 24, 22, 22, 21, 20, 20, 19,
+        19, 18, 18, 17, 17, 17, 16, 16, 16, 16, 33, 34, 36, 37, 39, 37, 36, 34,
+        32, 31, 29, 28, 26, 25, 24, 23, 22, 21, 20, 20, 19, 18, 18, 18, 17, 17,
+        16, 16, 16, 16, 16, 16, 31, 32, 34, 35, 37, 35, 34, 32, 31, 29, 28, 27,
+        25, 24, 23, 22, 21, 20, 20, 19, 18, 18, 18, 17, 17, 16, 16, 16, 15, 15,
+        15, 15, 29, 30, 32, 33, 35, 33, 32, 31, 29, 28, 27, 26, 24, 23, 22, 21,
+        21, 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 15, 15, 15, 15, 15, 27, 29,
+        30, 31, 33, 31, 30, 29, 28, 27, 26, 25, 23, 23, 22, 21, 20, 19, 19, 18,
+        18, 17, 17, 16, 16, 16, 15, 15, 15, 15, 15, 15, 25, 27, 28, 29, 30, 29,
+        29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 19, 18, 18, 17, 17, 16, 16,
+        16, 15, 15, 15, 14, 14, 14, 14, 24, 25, 27, 28, 29, 28, 27, 26, 26, 25,
+        24, 23, 22, 21, 20, 20, 19, 18, 18, 17, 17, 16, 16, 16, 15, 15, 15, 14,
+        14, 14, 14, 14, 23, 24, 25, 26, 27, 27, 26, 25, 24, 24, 23, 22, 21, 20,
+        20, 19, 18, 18, 17, 17, 16, 16, 16, 15, 15, 15, 14, 14, 14, 14, 14, 14,
+        22, 23, 24, 25, 26, 25, 25, 24, 23, 23, 22, 21, 20, 20, 19, 19, 18, 17,
+        17, 17, 16, 16, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 21, 22, 23, 24,
+        24, 24, 23, 23, 22, 22, 21, 20, 20, 19, 19, 18, 17, 17, 17, 16, 16, 15,
+        15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 21, 22, 23, 24, 24, 24, 23, 23,
+        22, 22, 21, 20, 20, 19, 19, 18, 17, 17, 17, 16, 16, 15, 15, 15, 14, 14,
+        14, 14, 14, 14, 14, 14, 21, 22, 23, 24, 24, 24, 23, 23, 22, 22, 21, 20,
+        20, 19, 19, 18, 17, 17, 17, 16, 16, 15, 15, 15, 14, 14, 14, 14, 14, 14,
+        14, 14, 21, 22, 23, 24, 24, 24, 23, 23, 22, 22, 21, 20, 20, 19, 19, 18,
+        17, 17, 17, 16, 16, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14 },
+      { /* Intra matrices */
+        /* Size 4 */
+        217, 196, 112, 65, 196, 124, 84, 58, 112, 84, 55, 43, 65, 58, 43, 34,
+        /* Size 8 */
+        180, 246, 226, 167, 120, 88, 67, 53, 246, 212, 222, 187, 142, 107, 81,
+        64, 226, 222, 153, 128, 107, 87, 71, 58, 167, 187, 128, 97, 81, 69, 59,
+        50, 120, 142, 107, 81, 65, 56, 49, 44, 88, 107, 87, 69, 56, 48, 43, 39,
+        67, 81, 71, 59, 49, 43, 38, 35, 53, 64, 58, 50, 44, 39, 35, 32,
+        /* Size 16 */
+        188, 223, 257, 246, 236, 205, 174, 150, 125, 108, 92, 81, 70, 63, 56,
+        56, 223, 231, 239, 237, 234, 209, 185, 161, 137, 119, 102, 90, 78, 70,
+        62, 62, 257, 239, 221, 227, 232, 214, 195, 172, 148, 130, 111, 98, 85,
+        76, 67, 67, 246, 237, 227, 211, 196, 180, 165, 148, 130, 116, 101, 90,
+        80, 72, 64, 64, 236, 234, 232, 196, 160, 147, 134, 123, 112, 102, 91,
+        83, 74, 67, 61, 61, 205, 209, 214, 180, 147, 133, 118, 108, 98, 90, 82,
+        75, 68, 62, 57, 57, 174, 185, 195, 165, 134, 118, 102, 93, 84, 78, 72,
+        67, 61, 57, 53, 53, 150, 161, 172, 148, 123, 108, 93, 85, 76, 71, 65,
+        61, 56, 53, 49, 49, 125, 137, 148, 130, 112, 98, 84, 76, 68, 64, 59, 55,
+        51, 49, 46, 46, 108, 119, 130, 116, 102, 90, 78, 71, 64, 59, 54, 51, 48,
+        46, 43, 43, 92, 102, 111, 101, 91, 82, 72, 65, 59, 54, 50, 47, 45, 42,
+        40, 40, 81, 90, 98, 90, 83, 75, 67, 61, 55, 51, 47, 45, 42, 40, 38, 38,
+        70, 78, 85, 80, 74, 68, 61, 56, 51, 48, 45, 42, 40, 38, 36, 36, 63, 70,
+        76, 72, 67, 62, 57, 53, 49, 46, 42, 40, 38, 37, 35, 35, 56, 62, 67, 64,
+        61, 57, 53, 49, 46, 43, 40, 38, 36, 35, 34, 34, 56, 62, 67, 64, 61, 57,
+        53, 49, 46, 43, 40, 38, 36, 35, 34, 34,
+        /* Size 32 */
+        193, 210, 228, 245, 263, 258, 252, 247, 241, 226, 210, 194, 178, 166,
+        153, 141, 128, 120, 111, 103, 94, 89, 83, 77, 72, 68, 65, 61, 57, 57,
+        57, 57, 210, 221, 232, 243, 254, 250, 247, 244, 240, 226, 212, 198, 184,
+        171, 159, 146, 134, 125, 117, 108, 99, 93, 87, 82, 76, 72, 68, 64, 60,
+        60, 60, 60, 228, 232, 236, 240, 245, 243, 242, 241, 239, 227, 214, 202,
+        189, 177, 165, 152, 140, 131, 122, 113, 104, 98, 92, 86, 80, 75, 71, 67,
+        63, 63, 63, 63, 245, 243, 240, 238, 236, 236, 237, 238, 238, 227, 216,
+        205, 194, 182, 170, 158, 146, 137, 127, 118, 109, 103, 96, 90, 83, 79,
+        75, 70, 66, 66, 66, 66, 263, 254, 245, 236, 227, 229, 232, 235, 238,
+        228, 219, 209, 200, 188, 176, 164, 152, 142, 133, 123, 114, 107, 101,
+        94, 87, 83, 78, 73, 69, 69, 69, 69, 258, 250, 243, 236, 229, 227, 224,
+        222, 219, 210, 202, 193, 184, 174, 163, 153, 143, 134, 126, 117, 109,
+        103, 97, 90, 84, 80, 76, 71, 67, 67, 67, 67, 252, 247, 242, 237, 232,
+        224, 216, 208, 201, 193, 185, 177, 169, 160, 151, 142, 133, 126, 118,
+        111, 104, 98, 93, 87, 81, 77, 73, 69, 65, 65, 65, 65, 247, 244, 241,
+        238, 235, 222, 208, 195, 182, 175, 168, 160, 153, 146, 139, 131, 124,
+        118, 111, 105, 98, 93, 88, 83, 78, 75, 71, 67, 64, 64, 64, 64, 241, 240,
+        239, 238, 238, 219, 201, 182, 164, 157, 151, 144, 137, 132, 126, 120,
+        115, 109, 104, 99, 93, 89, 84, 80, 76, 72, 69, 65, 62, 62, 62, 62, 226,
+        226, 227, 227, 228, 210, 193, 175, 157, 150, 143, 136, 129, 124, 118,
+        113, 108, 103, 98, 93, 88, 84, 80, 76, 72, 69, 66, 63, 60, 60, 60, 60,
+        210, 212, 214, 216, 219, 202, 185, 168, 151, 143, 136, 128, 121, 116,
+        111, 106, 101, 96, 92, 88, 83, 80, 76, 73, 69, 66, 64, 61, 58, 58, 58,
+        58, 194, 198, 202, 205, 209, 193, 177, 160, 144, 136, 128, 120, 112,
+        108, 103, 98, 94, 90, 86, 82, 79, 75, 72, 69, 66, 64, 61, 59, 56, 56,
+        56, 56, 178, 184, 189, 194, 200, 184, 169, 153, 137, 129, 121, 112, 104,
+        100, 95, 91, 86, 83, 80, 77, 74, 71, 68, 66, 63, 61, 58, 56, 54, 54, 54,
+        54, 166, 171, 177, 182, 188, 174, 160, 146, 132, 124, 116, 108, 100, 95,
+        91, 87, 82, 79, 76, 73, 70, 68, 65, 63, 60, 58, 56, 54, 52, 52, 52, 52,
+        153, 159, 165, 170, 176, 163, 151, 139, 126, 118, 111, 103, 95, 91, 87,
+        82, 78, 75, 73, 70, 67, 65, 62, 60, 58, 56, 54, 52, 50, 50, 50, 50, 141,
+        146, 152, 158, 164, 153, 142, 131, 120, 113, 106, 98, 91, 87, 82, 78,
+        74, 71, 69, 66, 63, 61, 59, 57, 55, 54, 52, 50, 49, 49, 49, 49, 128,
+        134, 140, 146, 152, 143, 133, 124, 115, 108, 101, 94, 86, 82, 78, 74,
+        70, 68, 65, 63, 60, 58, 56, 54, 53, 51, 50, 48, 47, 47, 47, 47, 120,
+        125, 131, 137, 142, 134, 126, 118, 109, 103, 96, 90, 83, 79, 75, 71, 68,
+        65, 63, 60, 58, 56, 54, 53, 51, 50, 48, 47, 45, 45, 45, 45, 111, 117,
+        122, 127, 133, 126, 118, 111, 104, 98, 92, 86, 80, 76, 73, 69, 65, 63,
+        60, 58, 56, 54, 52, 51, 49, 48, 47, 45, 44, 44, 44, 44, 103, 108, 113,
+        118, 123, 117, 111, 105, 99, 93, 88, 82, 77, 73, 70, 66, 63, 60, 58, 56,
+        53, 52, 50, 49, 47, 46, 45, 44, 43, 43, 43, 43, 94, 99, 104, 109, 114,
+        109, 104, 98, 93, 88, 83, 79, 74, 70, 67, 63, 60, 58, 56, 53, 51, 50,
+        48, 47, 46, 44, 43, 42, 41, 41, 41, 41, 89, 93, 98, 103, 107, 103, 98,
+        93, 89, 84, 80, 75, 71, 68, 65, 61, 58, 56, 54, 52, 50, 49, 47, 46, 44,
+        43, 42, 41, 40, 40, 40, 40, 83, 87, 92, 96, 101, 97, 93, 88, 84, 80, 76,
+        72, 68, 65, 62, 59, 56, 54, 52, 50, 48, 47, 46, 44, 43, 42, 41, 40, 39,
+        39, 39, 39, 77, 82, 86, 90, 94, 90, 87, 83, 80, 76, 73, 69, 66, 63, 60,
+        57, 54, 53, 51, 49, 47, 46, 44, 43, 42, 41, 40, 39, 38, 38, 38, 38, 72,
+        76, 80, 83, 87, 84, 81, 78, 76, 72, 69, 66, 63, 60, 58, 55, 53, 51, 49,
+        47, 46, 44, 43, 42, 41, 40, 39, 38, 37, 37, 37, 37, 68, 72, 75, 79, 83,
+        80, 77, 75, 72, 69, 66, 64, 61, 58, 56, 54, 51, 50, 48, 46, 44, 43, 42,
+        41, 40, 39, 38, 37, 37, 37, 37, 37, 65, 68, 71, 75, 78, 76, 73, 71, 69,
+        66, 64, 61, 58, 56, 54, 52, 50, 48, 47, 45, 43, 42, 41, 40, 39, 38, 37,
+        37, 36, 36, 36, 36, 61, 64, 67, 70, 73, 71, 69, 67, 65, 63, 61, 59, 56,
+        54, 52, 50, 48, 47, 45, 44, 42, 41, 40, 39, 38, 37, 37, 36, 35, 35, 35,
+        35, 57, 60, 63, 66, 69, 67, 65, 64, 62, 60, 58, 56, 54, 52, 50, 49, 47,
+        45, 44, 43, 41, 40, 39, 38, 37, 37, 36, 35, 34, 34, 34, 34, 57, 60, 63,
+        66, 69, 67, 65, 64, 62, 60, 58, 56, 54, 52, 50, 49, 47, 45, 44, 43, 41,
+        40, 39, 38, 37, 37, 36, 35, 34, 34, 34, 34, 57, 60, 63, 66, 69, 67, 65,
+        64, 62, 60, 58, 56, 54, 52, 50, 49, 47, 45, 44, 43, 41, 40, 39, 38, 37,
+        37, 36, 35, 34, 34, 34, 34, 57, 60, 63, 66, 69, 67, 65, 64, 62, 60, 58,
+        56, 54, 52, 50, 49, 47, 45, 44, 43, 41, 40, 39, 38, 37, 37, 36, 35, 34,
+        34, 34, 34 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 39, 36, 28, 39, 32, 30, 26, 36, 30, 23, 20, 28, 26, 20, 16,
+        /* Size 8 */
+        64, 80, 43, 40, 37, 33, 29, 25, 80, 55, 42, 47, 45, 41, 36, 31, 43, 42,
+        35, 37, 37, 35, 32, 29, 40, 47, 37, 32, 31, 29, 27, 25, 37, 45, 37, 31,
+        27, 25, 24, 22, 33, 41, 35, 29, 25, 23, 21, 20, 29, 36, 32, 27, 24, 21,
+        19, 18, 25, 31, 29, 25, 22, 20, 18, 17,
+        /* Size 16 */
+        64, 72, 80, 61, 43, 41, 40, 39, 37, 35, 33, 31, 29, 27, 25, 25, 72, 70,
+        67, 55, 42, 43, 43, 42, 41, 39, 37, 35, 32, 30, 28, 28, 80, 67, 55, 48,
+        42, 44, 47, 46, 45, 43, 41, 38, 36, 34, 31, 31, 61, 55, 48, 43, 39, 40,
+        42, 41, 41, 39, 38, 36, 34, 32, 30, 30, 43, 42, 42, 39, 35, 36, 37, 37,
+        37, 36, 35, 33, 32, 30, 29, 29, 41, 43, 44, 40, 36, 35, 34, 34, 34, 33,
+        32, 31, 30, 28, 27, 27, 40, 43, 47, 42, 37, 34, 32, 31, 31, 30, 29, 28,
+        27, 26, 25, 25, 39, 42, 46, 41, 37, 34, 31, 30, 29, 28, 27, 26, 25, 25,
+        24, 24, 37, 41, 45, 41, 37, 34, 31, 29, 27, 26, 25, 24, 24, 23, 22, 22,
+        35, 39, 43, 39, 36, 33, 30, 28, 26, 25, 24, 23, 22, 22, 21, 21, 33, 37,
+        41, 38, 35, 32, 29, 27, 25, 24, 23, 22, 21, 20, 20, 20, 31, 35, 38, 36,
+        33, 31, 28, 26, 24, 23, 22, 21, 20, 19, 19, 19, 29, 32, 36, 34, 32, 30,
+        27, 25, 24, 22, 21, 20, 19, 18, 18, 18, 27, 30, 34, 32, 30, 28, 26, 25,
+        23, 22, 20, 19, 18, 18, 17, 17, 25, 28, 31, 30, 29, 27, 25, 24, 22, 21,
+        20, 19, 18, 17, 17, 17, 25, 28, 31, 30, 29, 27, 25, 24, 22, 21, 20, 19,
+        18, 17, 17, 17,
+        /* Size 32 */
+        64, 68, 72, 76, 80, 71, 61, 52, 43, 42, 41, 41, 40, 39, 39, 38, 37, 36,
+        35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 25, 25, 25, 68, 69, 71, 72,
+        74, 66, 58, 50, 43, 42, 42, 42, 42, 41, 41, 40, 39, 38, 37, 36, 35, 34,
+        33, 32, 31, 30, 29, 28, 27, 27, 27, 27, 72, 71, 70, 68, 67, 61, 55, 49,
+        42, 43, 43, 43, 43, 43, 42, 42, 41, 40, 39, 38, 37, 36, 35, 34, 32, 31,
+        30, 29, 28, 28, 28, 28, 76, 72, 68, 65, 61, 56, 52, 47, 42, 43, 44, 44,
+        45, 45, 44, 44, 43, 42, 41, 40, 39, 38, 37, 35, 34, 33, 32, 31, 30, 30,
+        30, 30, 80, 74, 67, 61, 55, 51, 48, 45, 42, 43, 44, 45, 47, 46, 46, 46,
+        45, 44, 43, 42, 41, 40, 38, 37, 36, 35, 34, 33, 31, 31, 31, 31, 71, 66,
+        61, 56, 51, 49, 46, 43, 40, 41, 42, 43, 44, 44, 44, 43, 43, 42, 41, 40,
+        39, 38, 37, 36, 35, 34, 33, 32, 31, 31, 31, 31, 61, 58, 55, 52, 48, 46,
+        43, 41, 39, 39, 40, 41, 42, 42, 41, 41, 41, 40, 39, 39, 38, 37, 36, 35,
+        34, 33, 32, 31, 30, 30, 30, 30, 52, 50, 49, 47, 45, 43, 41, 39, 37, 38,
+        38, 39, 39, 39, 39, 39, 39, 38, 38, 37, 36, 36, 35, 34, 33, 32, 31, 30,
+        29, 29, 29, 29, 43, 43, 42, 42, 42, 40, 39, 37, 35, 36, 36, 36, 37, 37,
+        37, 37, 37, 36, 36, 35, 35, 34, 33, 33, 32, 31, 30, 29, 29, 29, 29, 29,
+        42, 42, 43, 43, 43, 41, 39, 38, 36, 36, 36, 36, 36, 36, 35, 35, 35, 35,
+        34, 34, 33, 33, 32, 31, 31, 30, 29, 28, 28, 28, 28, 28, 41, 42, 43, 44,
+        44, 42, 40, 38, 36, 36, 35, 35, 34, 34, 34, 34, 34, 33, 33, 32, 32, 31,
+        31, 30, 30, 29, 28, 28, 27, 27, 27, 27, 41, 42, 43, 44, 45, 43, 41, 39,
+        36, 36, 35, 34, 33, 33, 33, 32, 32, 32, 31, 31, 31, 30, 30, 29, 28, 28,
+        27, 27, 26, 26, 26, 26, 40, 42, 43, 45, 47, 44, 42, 39, 37, 36, 34, 33,
+        32, 32, 31, 31, 31, 30, 30, 30, 29, 29, 28, 28, 27, 27, 26, 26, 25, 25,
+        25, 25, 39, 41, 43, 45, 46, 44, 42, 39, 37, 36, 34, 33, 32, 31, 31, 30,
+        30, 29, 29, 29, 28, 28, 27, 27, 26, 26, 25, 25, 24, 24, 24, 24, 39, 41,
+        42, 44, 46, 44, 41, 39, 37, 35, 34, 33, 31, 31, 30, 29, 29, 28, 28, 28,
+        27, 27, 26, 26, 25, 25, 25, 24, 24, 24, 24, 24, 38, 40, 42, 44, 46, 43,
+        41, 39, 37, 35, 34, 32, 31, 30, 29, 29, 28, 27, 27, 27, 26, 26, 25, 25,
+        25, 24, 24, 23, 23, 23, 23, 23, 37, 39, 41, 43, 45, 43, 41, 39, 37, 35,
+        34, 32, 31, 30, 29, 28, 27, 27, 26, 26, 25, 25, 24, 24, 24, 23, 23, 22,
+        22, 22, 22, 22, 36, 38, 40, 42, 44, 42, 40, 38, 36, 35, 33, 32, 30, 29,
+        28, 27, 27, 26, 26, 25, 24, 24, 24, 23, 23, 23, 22, 22, 21, 21, 21, 21,
+        35, 37, 39, 41, 43, 41, 39, 38, 36, 34, 33, 31, 30, 29, 28, 27, 26, 26,
+        25, 24, 24, 23, 23, 23, 22, 22, 22, 21, 21, 21, 21, 21, 34, 36, 38, 40,
+        42, 40, 39, 37, 35, 34, 32, 31, 30, 29, 28, 27, 26, 25, 24, 24, 23, 23,
+        22, 22, 22, 21, 21, 21, 20, 20, 20, 20, 33, 35, 37, 39, 41, 39, 38, 36,
+        35, 33, 32, 31, 29, 28, 27, 26, 25, 24, 24, 23, 23, 22, 22, 21, 21, 21,
+        20, 20, 20, 20, 20, 20, 32, 34, 36, 38, 40, 38, 37, 36, 34, 33, 31, 30,
+        29, 28, 27, 26, 25, 24, 23, 23, 22, 22, 21, 21, 20, 20, 20, 19, 19, 19,
+        19, 19, 31, 33, 35, 37, 38, 37, 36, 35, 33, 32, 31, 30, 28, 27, 26, 25,
+        24, 24, 23, 22, 22, 21, 21, 20, 20, 20, 19, 19, 19, 19, 19, 19, 30, 32,
+        34, 35, 37, 36, 35, 34, 33, 31, 30, 29, 28, 27, 26, 25, 24, 23, 23, 22,
+        21, 21, 20, 20, 20, 19, 19, 19, 18, 18, 18, 18, 29, 31, 32, 34, 36, 35,
+        34, 33, 32, 31, 30, 28, 27, 26, 25, 25, 24, 23, 22, 22, 21, 20, 20, 20,
+        19, 19, 18, 18, 18, 18, 18, 18, 28, 30, 31, 33, 35, 34, 33, 32, 31, 30,
+        29, 28, 27, 26, 25, 24, 23, 23, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18,
+        18, 18, 18, 18, 27, 29, 30, 32, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25,
+        25, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17,
+        26, 28, 29, 31, 33, 32, 31, 30, 29, 28, 28, 27, 26, 25, 24, 23, 22, 22,
+        21, 21, 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 25, 27, 28, 30,
+        31, 31, 30, 29, 29, 28, 27, 26, 25, 24, 24, 23, 22, 21, 21, 20, 20, 19,
+        19, 18, 18, 18, 17, 17, 17, 17, 17, 17, 25, 27, 28, 30, 31, 31, 30, 29,
+        29, 28, 27, 26, 25, 24, 24, 23, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18,
+        17, 17, 17, 17, 17, 17, 25, 27, 28, 30, 31, 31, 30, 29, 29, 28, 27, 26,
+        25, 24, 24, 23, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17,
+        17, 17, 25, 27, 28, 30, 31, 31, 30, 29, 29, 28, 27, 26, 25, 24, 24, 23,
+        22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 17 },
+      { /* Intra matrices */
+        /* Size 4 */
+        162, 98, 89, 68, 98, 80, 74, 63, 89, 74, 55, 47, 68, 63, 47, 38,
+        /* Size 8 */
+        151, 190, 99, 93, 86, 76, 66, 57, 190, 128, 97, 108, 105, 94, 82, 71,
+        99, 97, 81, 84, 85, 80, 72, 64, 93, 108, 84, 73, 69, 66, 61, 56, 86,
+        105, 85, 69, 61, 56, 52, 49, 76, 94, 80, 66, 56, 50, 46, 43, 66, 82, 72,
+        61, 52, 46, 41, 38, 57, 71, 64, 56, 49, 43, 38, 35,
+        /* Size 16 */
+        156, 176, 196, 149, 102, 99, 96, 92, 89, 83, 78, 73, 68, 63, 58, 58,
+        176, 170, 164, 133, 101, 103, 104, 101, 99, 93, 88, 82, 76, 71, 66, 66,
+        196, 164, 132, 116, 100, 106, 112, 110, 108, 103, 97, 91, 85, 79, 73,
+        73, 149, 133, 116, 104, 92, 96, 99, 99, 98, 94, 90, 85, 80, 75, 70, 70,
+        102, 101, 100, 92, 84, 85, 87, 87, 87, 85, 82, 79, 75, 71, 67, 67, 99,
+        103, 106, 96, 85, 83, 81, 80, 79, 77, 75, 72, 69, 66, 62, 62, 96, 104,
+        112, 99, 87, 81, 76, 74, 71, 70, 68, 66, 63, 61, 58, 58, 92, 101, 110,
+        99, 87, 80, 74, 70, 67, 65, 63, 61, 59, 56, 54, 54, 89, 99, 108, 98, 87,
+        79, 71, 67, 63, 60, 58, 56, 54, 52, 50, 50, 83, 93, 103, 94, 85, 77, 70,
+        65, 60, 57, 55, 53, 51, 49, 47, 47, 78, 88, 97, 90, 82, 75, 68, 63, 58,
+        55, 51, 49, 47, 46, 44, 44, 73, 82, 91, 85, 79, 72, 66, 61, 56, 53, 49,
+        47, 45, 43, 42, 42, 68, 76, 85, 80, 75, 69, 63, 59, 54, 51, 47, 45, 43,
+        41, 39, 39, 63, 71, 79, 75, 71, 66, 61, 56, 52, 49, 46, 43, 41, 39, 38,
+        38, 58, 66, 73, 70, 67, 62, 58, 54, 50, 47, 44, 42, 39, 38, 36, 36, 58,
+        66, 73, 70, 67, 62, 58, 54, 50, 47, 44, 42, 39, 38, 36, 36,
+        /* Size 32 */
+        158, 169, 179, 189, 199, 175, 152, 128, 104, 102, 101, 99, 97, 96, 94,
+        92, 90, 88, 85, 82, 79, 77, 74, 71, 69, 66, 64, 62, 59, 59, 59, 59, 169,
+        172, 176, 179, 183, 163, 143, 123, 103, 103, 102, 102, 101, 100, 98, 97,
+        95, 93, 90, 87, 84, 82, 79, 76, 73, 71, 68, 66, 63, 63, 63, 63, 179,
+        176, 173, 170, 167, 151, 135, 119, 103, 104, 104, 105, 106, 104, 103,
+        102, 100, 97, 95, 92, 89, 86, 83, 81, 78, 75, 72, 70, 67, 67, 67, 67,
+        189, 179, 170, 160, 150, 138, 126, 114, 102, 104, 106, 108, 110, 109,
+        107, 106, 105, 102, 100, 97, 94, 91, 88, 85, 82, 79, 76, 74, 71, 71, 71,
+        71, 199, 183, 167, 150, 134, 126, 118, 110, 102, 105, 108, 111, 114,
+        113, 112, 111, 110, 107, 105, 102, 99, 96, 93, 90, 86, 84, 81, 78, 75,
+        75, 75, 75, 175, 163, 151, 138, 126, 119, 112, 105, 98, 100, 103, 105,
+        107, 107, 106, 106, 105, 102, 100, 98, 95, 92, 90, 87, 84, 81, 78, 76,
+        73, 73, 73, 73, 152, 143, 135, 126, 118, 112, 106, 100, 94, 95, 97, 99,
+        101, 101, 100, 100, 100, 98, 96, 94, 91, 89, 86, 84, 81, 79, 76, 74, 71,
+        71, 71, 71, 128, 123, 119, 114, 110, 105, 100, 95, 89, 91, 92, 94, 95,
+        95, 95, 94, 94, 93, 91, 89, 88, 85, 83, 81, 79, 76, 74, 72, 69, 69, 69,
+        69, 104, 103, 103, 102, 102, 98, 94, 89, 85, 86, 87, 88, 89, 89, 89, 89,
+        89, 88, 86, 85, 84, 82, 80, 78, 76, 74, 72, 70, 68, 68, 68, 68, 102,
+        103, 104, 104, 105, 100, 95, 91, 86, 86, 86, 86, 86, 85, 85, 85, 85, 84,
+        83, 81, 80, 78, 77, 75, 73, 71, 69, 67, 66, 66, 66, 66, 101, 102, 104,
+        106, 108, 103, 97, 92, 87, 86, 85, 84, 83, 82, 82, 81, 81, 80, 79, 78,
+        77, 75, 73, 72, 70, 69, 67, 65, 63, 63, 63, 63, 99, 102, 105, 108, 111,
+        105, 99, 94, 88, 86, 84, 82, 80, 79, 78, 78, 77, 76, 75, 74, 73, 71, 70,
+        69, 67, 66, 64, 63, 61, 61, 61, 61, 97, 101, 106, 110, 114, 107, 101,
+        95, 89, 86, 83, 80, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66, 64,
+        63, 62, 60, 59, 59, 59, 59, 96, 100, 104, 109, 113, 107, 101, 95, 89,
+        85, 82, 79, 76, 74, 73, 72, 70, 69, 68, 68, 67, 65, 64, 63, 62, 61, 60,
+        58, 57, 57, 57, 57, 94, 98, 103, 107, 112, 106, 100, 95, 89, 85, 82, 78,
+        75, 73, 72, 70, 68, 67, 66, 65, 64, 63, 62, 61, 60, 58, 57, 56, 55, 55,
+        55, 55, 92, 97, 102, 106, 111, 106, 100, 94, 89, 85, 81, 78, 74, 72, 70,
+        68, 66, 65, 64, 63, 61, 60, 59, 58, 57, 56, 55, 54, 53, 53, 53, 53, 90,
+        95, 100, 105, 110, 105, 100, 94, 89, 85, 81, 77, 73, 70, 68, 66, 64, 63,
+        61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 51, 51, 51, 88, 93, 97, 102,
+        107, 102, 98, 93, 88, 84, 80, 76, 72, 69, 67, 65, 63, 61, 60, 59, 57,
+        56, 55, 54, 53, 52, 51, 50, 49, 49, 49, 49, 85, 90, 95, 100, 105, 100,
+        96, 91, 86, 83, 79, 75, 71, 68, 66, 64, 61, 60, 58, 57, 56, 55, 53, 52,
+        51, 51, 50, 49, 48, 48, 48, 48, 82, 87, 92, 97, 102, 98, 94, 89, 85, 81,
+        78, 74, 70, 68, 65, 63, 60, 59, 57, 55, 54, 53, 52, 51, 50, 49, 48, 47,
+        46, 46, 46, 46, 79, 84, 89, 94, 99, 95, 91, 88, 84, 80, 77, 73, 69, 67,
+        64, 61, 59, 57, 56, 54, 52, 51, 50, 49, 48, 47, 46, 46, 45, 45, 45, 45,
+        77, 82, 86, 91, 96, 92, 89, 85, 82, 78, 75, 71, 68, 65, 63, 60, 58, 56,
+        55, 53, 51, 50, 49, 48, 47, 46, 45, 44, 44, 44, 44, 44, 74, 79, 83, 88,
+        93, 90, 86, 83, 80, 77, 73, 70, 67, 64, 62, 59, 57, 55, 53, 52, 50, 49,
+        48, 47, 46, 45, 44, 43, 42, 42, 42, 42, 71, 76, 81, 85, 90, 87, 84, 81,
+        78, 75, 72, 69, 66, 63, 61, 58, 56, 54, 52, 51, 49, 48, 47, 46, 45, 44,
+        43, 42, 41, 41, 41, 41, 69, 73, 78, 82, 86, 84, 81, 79, 76, 73, 70, 67,
+        64, 62, 60, 57, 55, 53, 51, 50, 48, 47, 46, 45, 43, 43, 42, 41, 40, 40,
+        40, 40, 66, 71, 75, 79, 84, 81, 79, 76, 74, 71, 69, 66, 63, 61, 58, 56,
+        54, 52, 51, 49, 47, 46, 45, 44, 43, 42, 41, 40, 39, 39, 39, 39, 64, 68,
+        72, 76, 81, 78, 76, 74, 72, 69, 67, 64, 62, 60, 57, 55, 53, 51, 50, 48,
+        46, 45, 44, 43, 42, 41, 40, 39, 38, 38, 38, 38, 62, 66, 70, 74, 78, 76,
+        74, 72, 70, 67, 65, 63, 60, 58, 56, 54, 52, 50, 49, 47, 46, 44, 43, 42,
+        41, 40, 39, 38, 38, 38, 38, 38, 59, 63, 67, 71, 75, 73, 71, 69, 68, 66,
+        63, 61, 59, 57, 55, 53, 51, 49, 48, 46, 45, 44, 42, 41, 40, 39, 38, 38,
+        37, 37, 37, 37, 59, 63, 67, 71, 75, 73, 71, 69, 68, 66, 63, 61, 59, 57,
+        55, 53, 51, 49, 48, 46, 45, 44, 42, 41, 40, 39, 38, 38, 37, 37, 37, 37,
+        59, 63, 67, 71, 75, 73, 71, 69, 68, 66, 63, 61, 59, 57, 55, 53, 51, 49,
+        48, 46, 45, 44, 42, 41, 40, 39, 38, 38, 37, 37, 37, 37, 59, 63, 67, 71,
+        75, 73, 71, 69, 68, 66, 63, 61, 59, 57, 55, 53, 51, 49, 48, 46, 45, 44,
+        42, 41, 40, 39, 38, 38, 37, 37, 37, 37 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 58, 35, 23, 58, 39, 28, 21, 35, 28, 20, 16, 23, 21, 16, 14,
+        /* Size 8 */
+        64, 85, 79, 60, 44, 34, 27, 23, 85, 74, 78, 66, 52, 40, 32, 26, 79, 78,
+        55, 47, 40, 34, 29, 24, 60, 66, 47, 37, 32, 28, 25, 22, 44, 52, 40, 32,
+        27, 24, 22, 20, 34, 40, 34, 28, 24, 21, 19, 18, 27, 32, 29, 25, 22, 19,
+        18, 17, 23, 26, 24, 22, 20, 18, 17, 16,
+        /* Size 16 */
+        64, 75, 85, 82, 79, 69, 60, 52, 44, 39, 34, 31, 27, 25, 23, 23, 75, 77,
+        80, 79, 78, 71, 63, 55, 48, 43, 37, 33, 30, 27, 25, 25, 85, 80, 74, 76,
+        78, 72, 66, 59, 52, 46, 40, 36, 32, 29, 26, 26, 82, 79, 76, 71, 66, 62,
+        57, 51, 46, 42, 37, 34, 30, 28, 25, 25, 79, 78, 78, 66, 55, 51, 47, 44,
+        40, 37, 34, 31, 29, 26, 24, 24, 69, 71, 72, 62, 51, 47, 42, 39, 36, 33,
+        31, 29, 27, 25, 23, 23, 60, 63, 66, 57, 47, 42, 37, 34, 32, 30, 28, 26,
+        25, 23, 22, 22, 52, 55, 59, 51, 44, 39, 34, 32, 29, 28, 26, 24, 23, 22,
+        21, 21, 44, 48, 52, 46, 40, 36, 32, 29, 27, 25, 24, 23, 22, 21, 20, 20,
+        39, 43, 46, 42, 37, 33, 30, 28, 25, 24, 22, 21, 20, 20, 19, 19, 34, 37,
+        40, 37, 34, 31, 28, 26, 24, 22, 21, 20, 19, 19, 18, 18, 31, 33, 36, 34,
+        31, 29, 26, 24, 23, 21, 20, 19, 19, 18, 17, 17, 27, 30, 32, 30, 29, 27,
+        25, 23, 22, 20, 19, 19, 18, 17, 17, 17, 25, 27, 29, 28, 26, 25, 23, 22,
+        21, 20, 19, 18, 17, 17, 16, 16, 23, 25, 26, 25, 24, 23, 22, 21, 20, 19,
+        18, 17, 17, 16, 16, 16, 23, 25, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
+        17, 16, 16, 16,
+        /* Size 32 */
+        64, 69, 75, 80, 85, 84, 82, 80, 79, 74, 69, 64, 60, 56, 52, 48, 44, 42,
+        39, 37, 34, 32, 31, 29, 27, 26, 25, 24, 23, 23, 23, 23, 69, 73, 76, 79,
+        83, 82, 81, 80, 78, 74, 70, 66, 61, 58, 54, 50, 46, 44, 41, 38, 36, 34,
+        32, 30, 29, 27, 26, 25, 24, 24, 24, 24, 75, 76, 77, 79, 80, 79, 79, 79,
+        78, 74, 71, 67, 63, 59, 55, 52, 48, 45, 43, 40, 37, 35, 33, 32, 30, 28,
+        27, 26, 25, 25, 25, 25, 80, 79, 79, 78, 77, 77, 77, 78, 78, 75, 71, 68,
+        65, 61, 57, 54, 50, 47, 44, 41, 39, 37, 35, 33, 31, 30, 28, 27, 26, 26,
+        26, 26, 85, 83, 80, 77, 74, 75, 76, 77, 78, 75, 72, 69, 66, 63, 59, 55,
+        52, 49, 46, 43, 40, 38, 36, 34, 32, 31, 29, 28, 26, 26, 26, 26, 84, 82,
+        79, 77, 75, 74, 74, 73, 72, 69, 67, 64, 61, 58, 55, 52, 49, 46, 44, 41,
+        39, 37, 35, 33, 31, 30, 29, 27, 26, 26, 26, 26, 82, 81, 79, 77, 76, 74,
+        71, 69, 66, 64, 62, 59, 57, 54, 51, 49, 46, 44, 42, 39, 37, 35, 34, 32,
+        30, 29, 28, 27, 25, 25, 25, 25, 80, 80, 79, 78, 77, 73, 69, 65, 61, 59,
+        56, 54, 52, 50, 48, 45, 43, 41, 39, 37, 35, 34, 32, 31, 29, 28, 27, 26,
+        25, 25, 25, 25, 79, 78, 78, 78, 78, 72, 66, 61, 55, 53, 51, 49, 47, 46,
+        44, 42, 40, 39, 37, 36, 34, 33, 31, 30, 29, 27, 26, 25, 24, 24, 24, 24,
+        74, 74, 74, 75, 75, 69, 64, 59, 53, 51, 49, 47, 45, 43, 41, 40, 38, 37,
+        35, 34, 32, 31, 30, 29, 28, 27, 26, 25, 24, 24, 24, 24, 69, 70, 71, 71,
+        72, 67, 62, 56, 51, 49, 47, 44, 42, 41, 39, 38, 36, 35, 33, 32, 31, 30,
+        29, 28, 27, 26, 25, 24, 23, 23, 23, 23, 64, 66, 67, 68, 69, 64, 59, 54,
+        49, 47, 44, 42, 40, 38, 37, 35, 34, 33, 32, 31, 29, 28, 28, 27, 26, 25,
+        24, 23, 23, 23, 23, 23, 60, 61, 63, 65, 66, 61, 57, 52, 47, 45, 42, 40,
+        37, 36, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 25, 24, 23, 23, 22, 22,
+        22, 22, 56, 58, 59, 61, 63, 58, 54, 50, 46, 43, 41, 38, 36, 34, 33, 32,
+        31, 30, 29, 28, 27, 26, 25, 25, 24, 23, 23, 22, 21, 21, 21, 21, 52, 54,
+        55, 57, 59, 55, 51, 48, 44, 41, 39, 37, 34, 33, 32, 31, 29, 28, 28, 27,
+        26, 25, 24, 24, 23, 23, 22, 21, 21, 21, 21, 21, 48, 50, 52, 54, 55, 52,
+        49, 45, 42, 40, 38, 35, 33, 32, 31, 29, 28, 27, 26, 26, 25, 24, 24, 23,
+        22, 22, 21, 21, 20, 20, 20, 20, 44, 46, 48, 50, 52, 49, 46, 43, 40, 38,
+        36, 34, 32, 31, 29, 28, 27, 26, 25, 25, 24, 23, 23, 22, 22, 21, 21, 20,
+        20, 20, 20, 20, 42, 44, 45, 47, 49, 46, 44, 41, 39, 37, 35, 33, 31, 30,
+        28, 27, 26, 25, 25, 24, 23, 23, 22, 22, 21, 21, 20, 20, 19, 19, 19, 19,
+        39, 41, 43, 44, 46, 44, 42, 39, 37, 35, 33, 32, 30, 29, 28, 26, 25, 25,
+        24, 23, 22, 22, 21, 21, 20, 20, 20, 19, 19, 19, 19, 19, 37, 38, 40, 41,
+        43, 41, 39, 37, 36, 34, 32, 31, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21,
+        21, 20, 20, 20, 19, 19, 19, 19, 19, 19, 34, 36, 37, 39, 40, 39, 37, 35,
+        34, 32, 31, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 21, 20, 20, 19, 19,
+        19, 18, 18, 18, 18, 18, 32, 34, 35, 37, 38, 37, 35, 34, 33, 31, 30, 28,
+        27, 26, 25, 24, 23, 23, 22, 21, 21, 20, 20, 19, 19, 19, 18, 18, 18, 18,
+        18, 18, 31, 32, 33, 35, 36, 35, 34, 32, 31, 30, 29, 28, 26, 25, 24, 24,
+        23, 22, 21, 21, 20, 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 17, 29, 30,
+        32, 33, 34, 33, 32, 31, 30, 29, 28, 27, 25, 25, 24, 23, 22, 22, 21, 20,
+        20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 27, 29, 30, 31, 32, 31,
+        30, 29, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 20, 19, 19, 19, 18,
+        18, 18, 17, 17, 17, 17, 17, 17, 26, 27, 28, 30, 31, 30, 29, 28, 27, 27,
+        26, 25, 24, 23, 23, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17,
+        17, 17, 17, 17, 25, 26, 27, 28, 29, 29, 28, 27, 26, 26, 25, 24, 23, 23,
+        22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16,
+        24, 25, 26, 27, 28, 27, 27, 26, 25, 25, 24, 23, 23, 22, 21, 21, 20, 20,
+        19, 19, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 16, 23, 24, 25, 26,
+        26, 26, 25, 25, 24, 24, 23, 23, 22, 21, 21, 20, 20, 19, 19, 19, 18, 18,
+        17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 23, 24, 25, 26, 26, 26, 25, 25,
+        24, 24, 23, 23, 22, 21, 21, 20, 20, 19, 19, 19, 18, 18, 17, 17, 17, 17,
+        16, 16, 16, 16, 16, 16, 23, 24, 25, 26, 26, 26, 25, 25, 24, 24, 23, 23,
+        22, 21, 21, 20, 20, 19, 19, 19, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16,
+        16, 16, 23, 24, 25, 26, 26, 26, 25, 25, 24, 24, 23, 23, 22, 21, 21, 20,
+        20, 19, 19, 19, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 16, 16 },
+      { /* Intra matrices */
+        /* Size 4 */
+        192, 174, 103, 63, 174, 113, 79, 57, 103, 79, 55, 44, 63, 57, 44, 37,
+        /* Size 8 */
+        160, 216, 199, 149, 109, 82, 65, 53, 216, 187, 195, 166, 128, 98, 77,
+        62, 199, 195, 137, 116, 98, 81, 67, 57, 149, 166, 116, 90, 76, 66, 57,
+        50, 109, 128, 98, 76, 63, 55, 49, 45, 82, 98, 81, 66, 55, 48, 44, 40,
+        65, 77, 67, 57, 49, 44, 40, 37, 53, 62, 57, 50, 45, 40, 37, 35,
+        /* Size 16 */
+        167, 195, 224, 216, 207, 181, 155, 134, 113, 99, 85, 76, 67, 61, 55, 55,
+        195, 202, 209, 207, 205, 184, 164, 143, 123, 108, 94, 84, 74, 67, 60,
+        60, 224, 209, 194, 199, 203, 188, 172, 153, 133, 117, 102, 91, 80, 72,
+        65, 65, 216, 207, 199, 186, 173, 160, 147, 132, 118, 106, 93, 84, 75,
+        68, 62, 62, 207, 205, 203, 173, 143, 132, 121, 112, 102, 94, 85, 78, 70,
+        65, 59, 59, 181, 184, 188, 160, 132, 120, 107, 99, 91, 84, 77, 71, 65,
+        60, 56, 56, 155, 164, 172, 147, 121, 107, 94, 86, 79, 74, 69, 64, 60,
+        56, 53, 53, 134, 143, 153, 132, 112, 99, 86, 79, 72, 68, 63, 59, 56, 53,
+        50, 50, 113, 123, 133, 118, 102, 91, 79, 72, 66, 62, 57, 54, 51, 49, 47,
+        47, 99, 108, 117, 106, 94, 84, 74, 68, 62, 58, 54, 51, 48, 46, 44, 44,
+        85, 94, 102, 93, 85, 77, 69, 63, 57, 54, 50, 48, 46, 44, 42, 42, 76, 84,
+        91, 84, 78, 71, 64, 59, 54, 51, 48, 46, 44, 42, 40, 40, 67, 74, 80, 75,
+        70, 65, 60, 56, 51, 48, 46, 44, 42, 40, 39, 39, 61, 67, 72, 68, 65, 60,
+        56, 53, 49, 46, 44, 42, 40, 39, 38, 38, 55, 60, 65, 62, 59, 56, 53, 50,
+        47, 44, 42, 40, 39, 38, 36, 36, 55, 60, 65, 62, 59, 56, 53, 50, 47, 44,
+        42, 40, 39, 38, 36, 36,
+        /* Size 32 */
+        170, 185, 200, 214, 229, 225, 220, 215, 211, 198, 185, 171, 158, 147,
+        137, 126, 116, 109, 101, 94, 87, 83, 78, 73, 69, 66, 62, 59, 56, 56, 56,
+        56, 185, 194, 203, 212, 221, 219, 216, 213, 210, 198, 186, 174, 163,
+        152, 142, 131, 121, 113, 106, 99, 91, 87, 82, 77, 72, 69, 65, 62, 59,
+        59, 59, 59, 200, 203, 207, 210, 214, 213, 212, 210, 209, 199, 188, 178,
+        167, 157, 146, 136, 126, 118, 111, 103, 96, 90, 85, 80, 75, 72, 68, 65,
+        61, 61, 61, 61, 214, 212, 210, 208, 206, 207, 207, 208, 209, 199, 190,
+        181, 172, 161, 151, 141, 131, 123, 115, 108, 100, 94, 89, 84, 78, 75,
+        71, 67, 64, 64, 64, 64, 229, 221, 214, 206, 199, 201, 203, 205, 208,
+        200, 192, 184, 176, 166, 156, 146, 136, 128, 120, 112, 104, 98, 93, 87,
+        81, 78, 74, 70, 66, 66, 66, 66, 225, 219, 213, 207, 201, 199, 197, 194,
+        192, 185, 178, 170, 163, 154, 145, 137, 128, 121, 114, 107, 100, 95, 89,
+        84, 79, 75, 72, 68, 65, 65, 65, 65, 220, 216, 212, 207, 203, 197, 190,
+        183, 177, 170, 163, 157, 150, 142, 135, 128, 120, 114, 108, 102, 95, 91,
+        86, 81, 77, 73, 70, 67, 63, 63, 63, 63, 215, 213, 210, 208, 205, 194,
+        183, 172, 161, 155, 149, 143, 137, 131, 125, 118, 112, 107, 102, 96, 91,
+        87, 83, 78, 74, 71, 68, 65, 62, 62, 62, 62, 211, 210, 209, 209, 208,
+        192, 177, 161, 146, 140, 135, 129, 124, 119, 114, 109, 105, 100, 96, 91,
+        87, 83, 79, 75, 72, 69, 66, 63, 60, 60, 60, 60, 198, 198, 199, 199, 200,
+        185, 170, 155, 140, 134, 128, 123, 117, 112, 108, 103, 99, 95, 91, 87,
+        82, 79, 76, 72, 69, 66, 64, 61, 59, 59, 59, 59, 185, 186, 188, 190, 192,
+        178, 163, 149, 135, 128, 122, 116, 110, 105, 101, 97, 93, 89, 86, 82,
+        78, 75, 72, 69, 66, 64, 62, 59, 57, 57, 57, 57, 171, 174, 178, 181, 184,
+        170, 157, 143, 129, 123, 116, 109, 103, 99, 95, 91, 87, 84, 81, 77, 74,
+        72, 69, 66, 64, 62, 60, 57, 55, 55, 55, 55, 158, 163, 167, 172, 176,
+        163, 150, 137, 124, 117, 110, 103, 96, 92, 88, 85, 81, 78, 75, 73, 70,
+        68, 66, 63, 61, 59, 57, 55, 54, 54, 54, 54, 147, 152, 157, 161, 166,
+        154, 142, 131, 119, 112, 105, 99, 92, 88, 85, 81, 77, 75, 72, 70, 67,
+        65, 63, 61, 59, 57, 55, 54, 52, 52, 52, 52, 137, 142, 146, 151, 156,
+        145, 135, 125, 114, 108, 101, 95, 88, 85, 81, 78, 74, 72, 69, 67, 64,
+        62, 61, 59, 57, 55, 54, 52, 51, 51, 51, 51, 126, 131, 136, 141, 146,
+        137, 128, 118, 109, 103, 97, 91, 85, 81, 78, 74, 71, 68, 66, 64, 61, 60,
+        58, 56, 55, 53, 52, 50, 49, 49, 49, 49, 116, 121, 126, 131, 136, 128,
+        120, 112, 105, 99, 93, 87, 81, 77, 74, 71, 67, 65, 63, 61, 59, 57, 56,
+        54, 52, 51, 50, 49, 48, 48, 48, 48, 109, 113, 118, 123, 128, 121, 114,
+        107, 100, 95, 89, 84, 78, 75, 72, 68, 65, 63, 61, 59, 57, 55, 54, 52,
+        51, 50, 49, 48, 46, 46, 46, 46, 101, 106, 111, 115, 120, 114, 108, 102,
+        96, 91, 86, 81, 75, 72, 69, 66, 63, 61, 59, 57, 55, 54, 52, 51, 49, 48,
+        47, 46, 45, 45, 45, 45, 94, 99, 103, 108, 112, 107, 102, 96, 91, 87, 82,
+        77, 73, 70, 67, 64, 61, 59, 57, 55, 53, 52, 51, 49, 48, 47, 46, 45, 44,
+        44, 44, 44, 87, 91, 96, 100, 104, 100, 95, 91, 87, 82, 78, 74, 70, 67,
+        64, 61, 59, 57, 55, 53, 51, 50, 49, 48, 47, 46, 45, 44, 43, 43, 43, 43,
+        83, 87, 90, 94, 98, 95, 91, 87, 83, 79, 75, 72, 68, 65, 62, 60, 57, 55,
+        54, 52, 50, 49, 48, 47, 45, 45, 44, 43, 42, 42, 42, 42, 78, 82, 85, 89,
+        93, 89, 86, 83, 79, 76, 72, 69, 66, 63, 61, 58, 56, 54, 52, 51, 49, 48,
+        47, 46, 44, 44, 43, 42, 41, 41, 41, 41, 73, 77, 80, 84, 87, 84, 81, 78,
+        75, 72, 69, 66, 63, 61, 59, 56, 54, 52, 51, 49, 48, 47, 46, 45, 43, 43,
+        42, 41, 40, 40, 40, 40, 69, 72, 75, 78, 81, 79, 77, 74, 72, 69, 66, 64,
+        61, 59, 57, 55, 52, 51, 49, 48, 47, 45, 44, 43, 42, 42, 41, 40, 40, 40,
+        40, 40, 66, 69, 72, 75, 78, 75, 73, 71, 69, 66, 64, 62, 59, 57, 55, 53,
+        51, 50, 48, 47, 46, 45, 44, 43, 42, 41, 40, 40, 39, 39, 39, 39, 62, 65,
+        68, 71, 74, 72, 70, 68, 66, 64, 62, 60, 57, 55, 54, 52, 50, 49, 47, 46,
+        45, 44, 43, 42, 41, 40, 40, 39, 38, 38, 38, 38, 59, 62, 65, 67, 70, 68,
+        67, 65, 63, 61, 59, 57, 55, 54, 52, 50, 49, 48, 46, 45, 44, 43, 42, 41,
+        40, 40, 39, 38, 38, 38, 38, 38, 56, 59, 61, 64, 66, 65, 63, 62, 60, 59,
+        57, 55, 54, 52, 51, 49, 48, 46, 45, 44, 43, 42, 41, 40, 40, 39, 38, 38,
+        37, 37, 37, 37, 56, 59, 61, 64, 66, 65, 63, 62, 60, 59, 57, 55, 54, 52,
+        51, 49, 48, 46, 45, 44, 43, 42, 41, 40, 40, 39, 38, 38, 37, 37, 37, 37,
+        56, 59, 61, 64, 66, 65, 63, 62, 60, 59, 57, 55, 54, 52, 51, 49, 48, 46,
+        45, 44, 43, 42, 41, 40, 40, 39, 38, 38, 37, 37, 37, 37, 56, 59, 61, 64,
+        66, 65, 63, 62, 60, 59, 57, 55, 54, 52, 51, 49, 48, 46, 45, 44, 43, 42,
+        41, 40, 40, 39, 38, 38, 37, 37, 37, 37 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 40, 37, 29, 40, 34, 32, 27, 37, 32, 25, 22, 29, 27, 22, 18,
+        /* Size 8 */
+        64, 79, 44, 41, 39, 35, 31, 27, 79, 55, 43, 47, 46, 42, 37, 33, 44, 43,
+        37, 38, 38, 36, 33, 30, 41, 47, 38, 34, 32, 31, 29, 27, 39, 46, 38, 32,
+        29, 27, 25, 24, 35, 42, 36, 31, 27, 24, 23, 21, 31, 37, 33, 29, 25, 23,
+        21, 20, 27, 33, 30, 27, 24, 21, 20, 19,
+        /* Size 16 */
+        64, 72, 79, 62, 44, 42, 41, 40, 39, 37, 35, 33, 31, 29, 27, 27, 72, 69,
+        67, 55, 43, 44, 44, 43, 42, 40, 38, 36, 34, 32, 30, 30, 79, 67, 55, 49,
+        43, 45, 47, 47, 46, 44, 42, 39, 37, 35, 33, 33, 62, 55, 49, 44, 40, 41,
+        43, 42, 42, 41, 39, 37, 35, 33, 31, 31, 44, 43, 43, 40, 37, 37, 38, 38,
+        38, 37, 36, 35, 33, 32, 30, 30, 42, 44, 45, 41, 37, 37, 36, 35, 35, 34,
+        33, 32, 31, 30, 28, 28, 41, 44, 47, 43, 38, 36, 34, 33, 32, 31, 31, 30,
+        29, 28, 27, 27, 40, 43, 47, 42, 38, 35, 33, 32, 30, 29, 29, 28, 27, 26,
+        25, 25, 39, 42, 46, 42, 38, 35, 32, 30, 29, 28, 27, 26, 25, 25, 24, 24,
+        37, 40, 44, 41, 37, 34, 31, 29, 28, 27, 26, 25, 24, 23, 23, 23, 35, 38,
+        42, 39, 36, 33, 31, 29, 27, 26, 24, 24, 23, 22, 21, 21, 33, 36, 39, 37,
+        35, 32, 30, 28, 26, 25, 24, 23, 22, 21, 21, 21, 31, 34, 37, 35, 33, 31,
+        29, 27, 25, 24, 23, 22, 21, 20, 20, 20, 29, 32, 35, 33, 32, 30, 28, 26,
+        25, 23, 22, 21, 20, 20, 19, 19, 27, 30, 33, 31, 30, 28, 27, 25, 24, 23,
+        21, 21, 20, 19, 19, 19, 27, 30, 33, 31, 30, 28, 27, 25, 24, 23, 21, 21,
+        20, 19, 19, 19,
+        /* Size 32 */
+        64, 68, 72, 75, 79, 70, 62, 53, 44, 43, 42, 42, 41, 41, 40, 39, 39, 38,
+        37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 27, 27, 27, 68, 69, 71, 72,
+        73, 66, 58, 51, 43, 43, 43, 43, 43, 42, 42, 41, 40, 39, 38, 37, 36, 35,
+        34, 33, 32, 31, 30, 29, 28, 28, 28, 28, 72, 71, 69, 68, 67, 61, 55, 49,
+        43, 44, 44, 44, 44, 44, 43, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33,
+        32, 31, 30, 30, 30, 30, 75, 72, 68, 65, 61, 57, 52, 48, 43, 44, 44, 45,
+        46, 45, 45, 45, 44, 43, 42, 41, 40, 39, 38, 37, 35, 34, 33, 32, 31, 31,
+        31, 31, 79, 73, 67, 61, 55, 52, 49, 46, 43, 44, 45, 46, 47, 47, 47, 46,
+        46, 45, 44, 43, 42, 41, 39, 38, 37, 36, 35, 34, 33, 33, 33, 33, 70, 66,
+        61, 57, 52, 49, 47, 44, 41, 42, 43, 44, 45, 45, 44, 44, 44, 43, 42, 41,
+        40, 39, 38, 37, 36, 35, 34, 33, 32, 32, 32, 32, 62, 58, 55, 52, 49, 47,
+        44, 42, 40, 40, 41, 42, 43, 42, 42, 42, 42, 41, 41, 40, 39, 38, 37, 36,
+        35, 34, 33, 32, 31, 31, 31, 31, 53, 51, 49, 48, 46, 44, 42, 40, 38, 39,
+        39, 40, 40, 40, 40, 40, 40, 39, 39, 38, 38, 37, 36, 35, 34, 33, 32, 32,
+        31, 31, 31, 31, 44, 43, 43, 43, 43, 41, 40, 38, 37, 37, 37, 38, 38, 38,
+        38, 38, 38, 38, 37, 37, 36, 35, 35, 34, 33, 32, 32, 31, 30, 30, 30, 30,
+        43, 43, 44, 44, 44, 42, 40, 39, 37, 37, 37, 37, 37, 37, 37, 37, 37, 36,
+        36, 35, 35, 34, 33, 33, 32, 31, 31, 30, 29, 29, 29, 29, 42, 43, 44, 44,
+        45, 43, 41, 39, 37, 37, 37, 36, 36, 36, 35, 35, 35, 35, 34, 34, 33, 33,
+        32, 32, 31, 30, 30, 29, 28, 28, 28, 28, 42, 43, 44, 45, 46, 44, 42, 40,
+        38, 37, 36, 35, 35, 34, 34, 34, 33, 33, 33, 32, 32, 31, 31, 30, 30, 29,
+        29, 28, 28, 28, 28, 28, 41, 43, 44, 46, 47, 45, 43, 40, 38, 37, 36, 35,
+        34, 33, 33, 32, 32, 32, 31, 31, 31, 30, 30, 29, 29, 28, 28, 27, 27, 27,
+        27, 27, 41, 42, 44, 45, 47, 45, 42, 40, 38, 37, 36, 34, 33, 33, 32, 32,
+        31, 31, 30, 30, 30, 29, 29, 28, 28, 27, 27, 27, 26, 26, 26, 26, 40, 42,
+        43, 45, 47, 44, 42, 40, 38, 37, 35, 34, 33, 32, 32, 31, 30, 30, 29, 29,
+        29, 28, 28, 27, 27, 27, 26, 26, 25, 25, 25, 25, 39, 41, 43, 45, 46, 44,
+        42, 40, 38, 37, 35, 34, 32, 32, 31, 30, 29, 29, 29, 28, 28, 27, 27, 27,
+        26, 26, 25, 25, 25, 25, 25, 25, 39, 40, 42, 44, 46, 44, 42, 40, 38, 37,
+        35, 33, 32, 31, 30, 29, 29, 28, 28, 27, 27, 26, 26, 26, 25, 25, 25, 24,
+        24, 24, 24, 24, 38, 39, 41, 43, 45, 43, 41, 39, 38, 36, 35, 33, 32, 31,
+        30, 29, 28, 28, 27, 27, 26, 26, 25, 25, 25, 24, 24, 24, 23, 23, 23, 23,
+        37, 38, 40, 42, 44, 42, 41, 39, 37, 36, 34, 33, 31, 30, 29, 29, 28, 27,
+        27, 26, 26, 25, 25, 24, 24, 24, 23, 23, 23, 23, 23, 23, 36, 37, 39, 41,
+        43, 41, 40, 38, 37, 35, 34, 32, 31, 30, 29, 28, 27, 27, 26, 26, 25, 25,
+        24, 24, 23, 23, 23, 22, 22, 22, 22, 22, 35, 36, 38, 40, 42, 40, 39, 38,
+        36, 35, 33, 32, 31, 30, 29, 28, 27, 26, 26, 25, 24, 24, 24, 23, 23, 22,
+        22, 22, 21, 21, 21, 21, 34, 35, 37, 39, 41, 39, 38, 37, 35, 34, 33, 31,
+        30, 29, 28, 27, 26, 26, 25, 25, 24, 24, 23, 23, 22, 22, 22, 21, 21, 21,
+        21, 21, 33, 34, 36, 38, 39, 38, 37, 36, 35, 33, 32, 31, 30, 29, 28, 27,
+        26, 25, 25, 24, 24, 23, 23, 22, 22, 22, 21, 21, 21, 21, 21, 21, 32, 33,
+        35, 37, 38, 37, 36, 35, 34, 33, 32, 30, 29, 28, 27, 27, 26, 25, 24, 24,
+        23, 23, 22, 22, 21, 21, 21, 21, 20, 20, 20, 20, 31, 32, 34, 35, 37, 36,
+        35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 25, 24, 23, 23, 22, 22, 21,
+        21, 21, 20, 20, 20, 20, 20, 20, 30, 31, 33, 34, 36, 35, 34, 33, 32, 31,
+        30, 29, 28, 27, 27, 26, 25, 24, 24, 23, 22, 22, 22, 21, 21, 20, 20, 20,
+        19, 19, 19, 19, 29, 30, 32, 33, 35, 34, 33, 32, 32, 31, 30, 29, 28, 27,
+        26, 25, 25, 24, 23, 23, 22, 22, 21, 21, 20, 20, 20, 19, 19, 19, 19, 19,
+        28, 29, 31, 32, 34, 33, 32, 32, 31, 30, 29, 28, 27, 27, 26, 25, 24, 24,
+        23, 22, 22, 21, 21, 21, 20, 20, 19, 19, 19, 19, 19, 19, 27, 28, 30, 31,
+        33, 32, 31, 31, 30, 29, 28, 28, 27, 26, 25, 25, 24, 23, 23, 22, 21, 21,
+        21, 20, 20, 19, 19, 19, 19, 19, 19, 19, 27, 28, 30, 31, 33, 32, 31, 31,
+        30, 29, 28, 28, 27, 26, 25, 25, 24, 23, 23, 22, 21, 21, 21, 20, 20, 19,
+        19, 19, 19, 19, 19, 19, 27, 28, 30, 31, 33, 32, 31, 31, 30, 29, 28, 28,
+        27, 26, 25, 25, 24, 23, 23, 22, 21, 21, 21, 20, 20, 19, 19, 19, 19, 19,
+        19, 19, 27, 28, 30, 31, 33, 32, 31, 31, 30, 29, 28, 28, 27, 26, 25, 25,
+        24, 23, 23, 22, 21, 21, 21, 20, 20, 19, 19, 19, 19, 19, 19, 19 },
+      { /* Intra matrices */
+        /* Size 4 */
+        152, 94, 86, 67, 94, 77, 72, 62, 86, 72, 55, 48, 67, 62, 48, 40,
+        /* Size 8 */
+        141, 176, 95, 89, 83, 74, 65, 57, 176, 120, 93, 103, 100, 90, 80, 70,
+        95, 93, 79, 81, 82, 77, 71, 64, 89, 103, 81, 71, 68, 65, 61, 56, 83,
+        100, 82, 68, 60, 56, 53, 49, 74, 90, 77, 65, 56, 50, 47, 44, 65, 80, 71,
+        61, 53, 47, 43, 40, 57, 70, 64, 56, 49, 44, 40, 37,
+        /* Size 16 */
+        145, 163, 181, 139, 97, 94, 91, 88, 85, 81, 76, 71, 66, 62, 58, 58, 163,
+        158, 152, 124, 96, 98, 99, 96, 94, 89, 84, 79, 74, 70, 65, 65, 181, 152,
+        124, 110, 95, 101, 106, 104, 103, 98, 93, 87, 82, 77, 72, 72, 139, 124,
+        110, 99, 88, 91, 95, 94, 93, 90, 86, 82, 77, 73, 69, 69, 97, 96, 95, 88,
+        81, 82, 84, 84, 84, 82, 80, 76, 73, 69, 65, 65, 94, 98, 101, 91, 82, 80,
+        79, 78, 77, 75, 73, 70, 68, 65, 62, 62, 91, 99, 106, 95, 84, 79, 74, 72,
+        70, 68, 67, 65, 63, 60, 58, 58, 88, 96, 104, 94, 84, 78, 72, 69, 66, 64,
+        62, 60, 58, 56, 54, 54, 85, 94, 103, 93, 84, 77, 70, 66, 62, 60, 58, 56,
+        54, 52, 51, 51, 81, 89, 98, 90, 82, 75, 68, 64, 60, 57, 55, 53, 51, 50,
+        48, 48, 76, 84, 93, 86, 80, 73, 67, 62, 58, 55, 52, 50, 48, 47, 45, 45,
+        71, 79, 87, 82, 76, 70, 65, 60, 56, 53, 50, 48, 46, 45, 43, 43, 66, 74,
+        82, 77, 73, 68, 63, 58, 54, 51, 48, 46, 44, 43, 41, 41, 62, 70, 77, 73,
+        69, 65, 60, 56, 52, 50, 47, 45, 43, 41, 40, 40, 58, 65, 72, 69, 65, 62,
+        58, 54, 51, 48, 45, 43, 41, 40, 38, 38, 58, 65, 72, 69, 65, 62, 58, 54,
+        51, 48, 45, 43, 41, 40, 38, 38,
+        /* Size 32 */
+        147, 156, 165, 175, 184, 163, 141, 120, 99, 97, 96, 94, 93, 91, 90, 88,
+        87, 84, 82, 79, 77, 75, 72, 70, 67, 65, 63, 61, 59, 59, 59, 59, 156,
+        160, 163, 166, 169, 151, 134, 116, 98, 98, 97, 97, 97, 95, 94, 92, 91,
+        89, 86, 84, 81, 79, 76, 74, 71, 69, 67, 65, 62, 62, 62, 62, 165, 163,
+        160, 157, 155, 140, 126, 112, 98, 98, 99, 100, 100, 99, 98, 97, 95, 93,
+        91, 88, 86, 83, 80, 78, 75, 73, 71, 68, 66, 66, 66, 66, 175, 166, 157,
+        149, 140, 129, 119, 108, 97, 99, 101, 102, 104, 103, 102, 101, 100, 97,
+        95, 93, 90, 87, 85, 82, 79, 77, 74, 72, 69, 69, 69, 69, 184, 169, 155,
+        140, 126, 118, 111, 104, 97, 100, 102, 105, 108, 107, 106, 105, 104,
+        102, 99, 97, 94, 92, 89, 86, 83, 81, 78, 75, 73, 73, 73, 73, 163, 151,
+        140, 129, 118, 112, 106, 99, 93, 95, 98, 100, 102, 101, 101, 100, 100,
+        97, 95, 93, 91, 88, 86, 83, 81, 78, 76, 74, 71, 71, 71, 71, 141, 134,
+        126, 119, 111, 106, 100, 95, 89, 91, 93, 95, 96, 96, 96, 95, 95, 93, 91,
+        89, 88, 85, 83, 81, 79, 76, 74, 72, 70, 70, 70, 70, 120, 116, 112, 108,
+        104, 99, 95, 90, 86, 87, 88, 89, 91, 91, 90, 90, 90, 89, 87, 86, 84, 82,
+        80, 78, 76, 74, 72, 70, 68, 68, 68, 68, 99, 98, 98, 97, 97, 93, 89, 86,
+        82, 83, 84, 84, 85, 85, 85, 85, 85, 84, 83, 82, 81, 79, 77, 76, 74, 72,
+        70, 68, 66, 66, 66, 66, 97, 98, 98, 99, 100, 95, 91, 87, 83, 83, 83, 83,
+        82, 82, 82, 82, 82, 81, 80, 79, 78, 76, 74, 73, 71, 70, 68, 66, 64, 64,
+        64, 64, 96, 97, 99, 101, 102, 98, 93, 88, 84, 83, 82, 81, 80, 79, 79,
+        79, 78, 77, 76, 75, 74, 73, 71, 70, 69, 67, 66, 64, 63, 63, 63, 63, 94,
+        97, 100, 102, 105, 100, 95, 89, 84, 83, 81, 79, 77, 77, 76, 75, 75, 74,
+        73, 72, 71, 70, 69, 67, 66, 65, 63, 62, 61, 61, 61, 61, 93, 97, 100,
+        104, 108, 102, 96, 91, 85, 82, 80, 77, 75, 74, 73, 72, 71, 70, 69, 68,
+        68, 67, 66, 65, 64, 62, 61, 60, 59, 59, 59, 59, 91, 95, 99, 103, 107,
+        101, 96, 91, 85, 82, 79, 77, 74, 72, 71, 70, 69, 68, 67, 66, 65, 64, 63,
+        62, 61, 60, 59, 58, 57, 57, 57, 57, 90, 94, 98, 102, 106, 101, 96, 90,
+        85, 82, 79, 76, 73, 71, 70, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58,
+        57, 56, 55, 55, 55, 55, 88, 92, 97, 101, 105, 100, 95, 90, 85, 82, 79,
+        75, 72, 70, 68, 67, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53,
+        53, 53, 53, 87, 91, 95, 100, 104, 100, 95, 90, 85, 82, 78, 75, 71, 69,
+        67, 65, 63, 62, 61, 60, 58, 58, 57, 56, 55, 54, 53, 52, 52, 52, 52, 52,
+        84, 89, 93, 97, 102, 97, 93, 89, 84, 81, 77, 74, 70, 68, 66, 64, 62, 61,
+        59, 58, 57, 56, 55, 54, 53, 53, 52, 51, 50, 50, 50, 50, 82, 86, 91, 95,
+        99, 95, 91, 87, 83, 80, 76, 73, 69, 67, 65, 63, 61, 59, 58, 57, 56, 55,
+        54, 53, 52, 51, 50, 50, 49, 49, 49, 49, 79, 84, 88, 93, 97, 93, 89, 86,
+        82, 79, 75, 72, 68, 66, 64, 62, 60, 58, 57, 56, 54, 53, 52, 51, 50, 50,
+        49, 48, 47, 47, 47, 47, 77, 81, 86, 90, 94, 91, 88, 84, 81, 78, 74, 71,
+        68, 65, 63, 61, 58, 57, 56, 54, 53, 52, 51, 50, 49, 48, 47, 47, 46, 46,
+        46, 46, 75, 79, 83, 87, 92, 88, 85, 82, 79, 76, 73, 70, 67, 64, 62, 60,
+        58, 56, 55, 53, 52, 51, 50, 49, 48, 47, 46, 46, 45, 45, 45, 45, 72, 76,
+        80, 85, 89, 86, 83, 80, 77, 74, 71, 69, 66, 63, 61, 59, 57, 55, 54, 52,
+        51, 50, 49, 48, 47, 46, 45, 45, 44, 44, 44, 44, 70, 74, 78, 82, 86, 83,
+        81, 78, 76, 73, 70, 67, 65, 62, 60, 58, 56, 54, 53, 51, 50, 49, 48, 47,
+        46, 45, 44, 44, 43, 43, 43, 43, 67, 71, 75, 79, 83, 81, 79, 76, 74, 71,
+        69, 66, 64, 61, 59, 57, 55, 53, 52, 50, 49, 48, 47, 46, 45, 44, 43, 43,
+        42, 42, 42, 42, 65, 69, 73, 77, 81, 78, 76, 74, 72, 70, 67, 65, 62, 60,
+        58, 56, 54, 53, 51, 50, 48, 47, 46, 45, 44, 43, 43, 42, 41, 41, 41, 41,
+        63, 67, 71, 74, 78, 76, 74, 72, 70, 68, 66, 63, 61, 59, 57, 55, 53, 52,
+        50, 49, 47, 46, 45, 44, 43, 43, 42, 41, 40, 40, 40, 40, 61, 65, 68, 72,
+        75, 74, 72, 70, 68, 66, 64, 62, 60, 58, 56, 54, 52, 51, 50, 48, 47, 46,
+        45, 44, 43, 42, 41, 40, 40, 40, 40, 40, 59, 62, 66, 69, 73, 71, 70, 68,
+        66, 64, 63, 61, 59, 57, 55, 53, 52, 50, 49, 47, 46, 45, 44, 43, 42, 41,
+        40, 40, 39, 39, 39, 39, 59, 62, 66, 69, 73, 71, 70, 68, 66, 64, 63, 61,
+        59, 57, 55, 53, 52, 50, 49, 47, 46, 45, 44, 43, 42, 41, 40, 40, 39, 39,
+        39, 39, 59, 62, 66, 69, 73, 71, 70, 68, 66, 64, 63, 61, 59, 57, 55, 53,
+        52, 50, 49, 47, 46, 45, 44, 43, 42, 41, 40, 40, 39, 39, 39, 39, 59, 62,
+        66, 69, 73, 71, 70, 68, 66, 64, 63, 61, 59, 57, 55, 53, 52, 50, 49, 47,
+        46, 45, 44, 43, 42, 41, 40, 40, 39, 39, 39, 39 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 59, 37, 24, 59, 40, 29, 23, 37, 29, 22, 19, 24, 23, 19, 16,
+        /* Size 8 */
+        64, 84, 78, 60, 45, 36, 29, 25, 84, 74, 77, 66, 52, 41, 34, 28, 78, 77,
+        56, 48, 42, 36, 30, 27, 60, 66, 48, 39, 34, 30, 27, 24, 45, 52, 42, 34,
+        29, 26, 24, 22, 36, 41, 36, 30, 26, 23, 22, 21, 29, 34, 30, 27, 24, 22,
+        20, 19, 25, 28, 27, 24, 22, 21, 19, 19,
+        /* Size 16 */
+        64, 74, 84, 81, 78, 69, 60, 53, 45, 41, 36, 33, 29, 27, 25, 25, 74, 77,
+        79, 78, 77, 70, 63, 56, 49, 44, 39, 35, 32, 29, 27, 27, 84, 79, 74, 75,
+        77, 71, 66, 59, 52, 47, 41, 38, 34, 31, 28, 28, 81, 78, 75, 71, 66, 62,
+        57, 52, 47, 43, 38, 35, 32, 30, 28, 28, 78, 77, 77, 66, 56, 52, 48, 45,
+        42, 39, 36, 33, 30, 29, 27, 27, 69, 70, 71, 62, 52, 48, 43, 40, 38, 35,
+        33, 31, 29, 27, 25, 25, 60, 63, 66, 57, 48, 43, 39, 36, 34, 32, 30, 28,
+        27, 26, 24, 24, 53, 56, 59, 52, 45, 40, 36, 34, 31, 30, 28, 27, 25, 24,
+        23, 23, 45, 49, 52, 47, 42, 38, 34, 31, 29, 27, 26, 25, 24, 23, 22, 22,
+        41, 44, 47, 43, 39, 35, 32, 30, 27, 26, 25, 24, 23, 22, 21, 21, 36, 39,
+        41, 38, 36, 33, 30, 28, 26, 25, 23, 23, 22, 21, 21, 21, 33, 35, 38, 35,
+        33, 31, 28, 27, 25, 24, 23, 22, 21, 21, 20, 20, 29, 32, 34, 32, 30, 29,
+        27, 25, 24, 23, 22, 21, 20, 20, 19, 19, 27, 29, 31, 30, 29, 27, 26, 24,
+        23, 22, 21, 21, 20, 20, 19, 19, 25, 27, 28, 28, 27, 25, 24, 23, 22, 21,
+        21, 20, 19, 19, 19, 19, 25, 27, 28, 28, 27, 25, 24, 23, 22, 21, 21, 20,
+        19, 19, 19, 19,
+        /* Size 32 */
+        64, 69, 74, 79, 84, 83, 81, 80, 78, 73, 69, 64, 60, 56, 53, 49, 45, 43,
+        41, 38, 36, 34, 33, 31, 29, 28, 27, 26, 25, 25, 25, 25, 69, 72, 75, 78,
+        82, 81, 80, 79, 78, 74, 70, 66, 61, 58, 54, 51, 47, 45, 42, 40, 37, 35,
+        34, 32, 30, 29, 28, 27, 26, 26, 26, 26, 74, 75, 77, 78, 79, 79, 78, 78,
+        77, 74, 70, 67, 63, 59, 56, 52, 49, 46, 44, 41, 39, 37, 35, 33, 32, 30,
+        29, 28, 27, 27, 27, 27, 79, 78, 78, 77, 76, 77, 77, 77, 77, 74, 71, 68,
+        65, 61, 58, 54, 51, 48, 45, 43, 40, 38, 36, 35, 33, 31, 30, 29, 28, 28,
+        28, 28, 84, 82, 79, 76, 74, 75, 75, 76, 77, 74, 71, 69, 66, 63, 59, 56,
+        52, 50, 47, 44, 41, 40, 38, 36, 34, 32, 31, 30, 28, 28, 28, 28, 83, 81,
+        79, 77, 75, 74, 73, 72, 72, 69, 67, 64, 62, 59, 56, 53, 50, 47, 45, 42,
+        40, 38, 36, 35, 33, 32, 30, 29, 28, 28, 28, 28, 81, 80, 78, 77, 75, 73,
+        71, 69, 66, 64, 62, 59, 57, 55, 52, 50, 47, 45, 43, 41, 38, 37, 35, 34,
+        32, 31, 30, 29, 28, 28, 28, 28, 80, 79, 78, 77, 76, 72, 69, 65, 61, 59,
+        57, 55, 53, 51, 48, 46, 44, 43, 41, 39, 37, 36, 34, 33, 31, 30, 29, 28,
+        27, 27, 27, 27, 78, 78, 77, 77, 77, 72, 66, 61, 56, 54, 52, 50, 48, 47,
+        45, 43, 42, 40, 39, 37, 36, 34, 33, 32, 30, 29, 29, 28, 27, 27, 27, 27,
+        73, 74, 74, 74, 74, 69, 64, 59, 54, 52, 50, 48, 46, 44, 43, 41, 40, 38,
+        37, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 26, 26, 26, 69, 70, 70, 71,
+        71, 67, 62, 57, 52, 50, 48, 46, 43, 42, 40, 39, 38, 36, 35, 34, 33, 32,
+        31, 30, 29, 28, 27, 26, 25, 25, 25, 25, 64, 66, 67, 68, 69, 64, 59, 55,
+        50, 48, 46, 43, 41, 40, 38, 37, 36, 35, 33, 32, 31, 30, 30, 29, 28, 27,
+        26, 26, 25, 25, 25, 25, 60, 61, 63, 65, 66, 62, 57, 53, 48, 46, 43, 41,
+        39, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 28, 27, 26, 26, 25, 24, 24,
+        24, 24, 56, 58, 59, 61, 63, 59, 55, 51, 47, 44, 42, 40, 37, 36, 35, 34,
+        32, 32, 31, 30, 29, 28, 27, 27, 26, 25, 25, 24, 24, 24, 24, 24, 53, 54,
+        56, 58, 59, 56, 52, 48, 45, 43, 40, 38, 36, 35, 34, 32, 31, 30, 30, 29,
+        28, 27, 27, 26, 25, 25, 24, 24, 23, 23, 23, 23, 49, 51, 52, 54, 56, 53,
+        50, 46, 43, 41, 39, 37, 35, 34, 32, 31, 30, 29, 28, 28, 27, 26, 26, 25,
+        25, 24, 24, 23, 23, 23, 23, 23, 45, 47, 49, 51, 52, 50, 47, 44, 42, 40,
+        38, 36, 34, 32, 31, 30, 29, 28, 27, 27, 26, 25, 25, 24, 24, 23, 23, 23,
+        22, 22, 22, 22, 43, 45, 46, 48, 50, 47, 45, 43, 40, 38, 36, 35, 33, 32,
+        30, 29, 28, 27, 27, 26, 25, 25, 24, 24, 23, 23, 23, 22, 22, 22, 22, 22,
+        41, 42, 44, 45, 47, 45, 43, 41, 39, 37, 35, 33, 32, 31, 30, 28, 27, 27,
+        26, 25, 25, 24, 24, 23, 23, 22, 22, 22, 21, 21, 21, 21, 38, 40, 41, 43,
+        44, 42, 41, 39, 37, 35, 34, 32, 31, 30, 29, 28, 27, 26, 25, 25, 24, 24,
+        23, 23, 22, 22, 22, 21, 21, 21, 21, 21, 36, 37, 39, 40, 41, 40, 38, 37,
+        36, 34, 33, 31, 30, 29, 28, 27, 26, 25, 25, 24, 23, 23, 23, 22, 22, 22,
+        21, 21, 21, 21, 21, 21, 34, 35, 37, 38, 40, 38, 37, 36, 34, 33, 32, 30,
+        29, 28, 27, 26, 25, 25, 24, 24, 23, 23, 22, 22, 21, 21, 21, 21, 20, 20,
+        20, 20, 33, 34, 35, 36, 38, 36, 35, 34, 33, 32, 31, 30, 28, 27, 27, 26,
+        25, 24, 24, 23, 23, 22, 22, 22, 21, 21, 21, 20, 20, 20, 20, 20, 31, 32,
+        33, 35, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 24, 23, 23,
+        22, 22, 22, 21, 21, 21, 20, 20, 20, 20, 20, 20, 29, 30, 32, 33, 34, 33,
+        32, 31, 30, 30, 29, 28, 27, 26, 25, 25, 24, 23, 23, 22, 22, 21, 21, 21,
+        20, 20, 20, 20, 19, 19, 19, 19, 28, 29, 30, 31, 32, 32, 31, 30, 29, 29,
+        28, 27, 26, 25, 25, 24, 23, 23, 22, 22, 22, 21, 21, 21, 20, 20, 20, 19,
+        19, 19, 19, 19, 27, 28, 29, 30, 31, 30, 30, 29, 29, 28, 27, 26, 26, 25,
+        24, 24, 23, 23, 22, 22, 21, 21, 21, 20, 20, 20, 20, 19, 19, 19, 19, 19,
+        26, 27, 28, 29, 30, 29, 29, 28, 28, 27, 26, 26, 25, 24, 24, 23, 23, 22,
+        22, 21, 21, 21, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 25, 26, 27, 28,
+        28, 28, 28, 27, 27, 26, 25, 25, 24, 24, 23, 23, 22, 22, 21, 21, 21, 20,
+        20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 25, 26, 27, 28, 28, 28, 28, 27,
+        27, 26, 25, 25, 24, 24, 23, 23, 22, 22, 21, 21, 21, 20, 20, 20, 19, 19,
+        19, 19, 19, 19, 19, 19, 25, 26, 27, 28, 28, 28, 28, 27, 27, 26, 25, 25,
+        24, 24, 23, 23, 22, 22, 21, 21, 21, 20, 20, 20, 19, 19, 19, 19, 19, 19,
+        19, 19, 25, 26, 27, 28, 28, 28, 28, 27, 27, 26, 25, 25, 24, 24, 23, 23,
+        22, 22, 21, 21, 21, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19 },
+      { /* Intra matrices */
+        /* Size 4 */
+        171, 157, 95, 62, 157, 104, 75, 57, 95, 75, 54, 45, 62, 57, 45, 39,
+        /* Size 8 */
+        144, 192, 177, 135, 101, 78, 63, 53, 192, 167, 174, 149, 117, 91, 73,
+        61, 177, 174, 125, 107, 92, 77, 65, 56, 135, 149, 107, 84, 73, 64, 57,
+        51, 101, 117, 92, 73, 62, 55, 50, 46, 78, 91, 77, 64, 55, 49, 45, 42,
+        63, 73, 65, 57, 50, 45, 42, 39, 53, 61, 56, 51, 46, 42, 39, 38,
+        /* Size 16 */
+        149, 174, 199, 191, 184, 162, 140, 122, 104, 92, 81, 73, 65, 60, 55, 55,
+        174, 180, 186, 184, 182, 165, 147, 130, 113, 100, 88, 79, 70, 65, 59,
+        59, 199, 186, 173, 177, 181, 168, 154, 138, 121, 108, 95, 85, 76, 69,
+        63, 63, 191, 184, 177, 166, 155, 144, 133, 120, 108, 98, 87, 80, 72, 66,
+        61, 61, 184, 182, 181, 155, 129, 120, 111, 103, 95, 88, 80, 74, 68, 63,
+        58, 58, 162, 165, 168, 144, 120, 110, 99, 92, 85, 79, 73, 68, 63, 59,
+        55, 55, 140, 147, 154, 133, 111, 99, 88, 81, 75, 71, 66, 63, 59, 56, 53,
+        53, 122, 130, 138, 120, 103, 92, 81, 75, 70, 66, 62, 58, 55, 53, 50, 50,
+        104, 113, 121, 108, 95, 85, 75, 70, 64, 60, 57, 54, 52, 50, 48, 48, 92,
+        100, 108, 98, 88, 79, 71, 66, 60, 57, 54, 51, 49, 47, 46, 46, 81, 88,
+        95, 87, 80, 73, 66, 62, 57, 54, 51, 49, 47, 45, 44, 44, 73, 79, 85, 80,
+        74, 68, 63, 58, 54, 51, 49, 47, 45, 44, 42, 42, 65, 70, 76, 72, 68, 63,
+        59, 55, 52, 49, 47, 45, 43, 42, 41, 41, 60, 65, 69, 66, 63, 59, 56, 53,
+        50, 47, 45, 44, 42, 41, 40, 40, 55, 59, 63, 61, 58, 55, 53, 50, 48, 46,
+        44, 42, 41, 40, 39, 39, 55, 59, 63, 61, 58, 55, 53, 50, 48, 46, 44, 42,
+        41, 40, 39, 39,
+        /* Size 32 */
+        152, 165, 177, 190, 202, 198, 195, 191, 187, 176, 165, 153, 142, 133,
+        124, 115, 106, 100, 94, 88, 82, 78, 74, 70, 66, 64, 61, 58, 56, 56, 56,
+        56, 165, 173, 180, 188, 196, 193, 191, 189, 186, 176, 166, 156, 146,
+        137, 128, 119, 111, 104, 98, 92, 86, 82, 77, 73, 69, 66, 63, 61, 58, 58,
+        58, 58, 177, 180, 183, 186, 189, 188, 187, 187, 186, 177, 168, 159, 150,
+        141, 132, 124, 115, 108, 102, 96, 89, 85, 80, 76, 72, 69, 66, 63, 60,
+        60, 60, 60, 190, 188, 186, 185, 183, 183, 184, 184, 185, 177, 169, 161,
+        154, 145, 136, 128, 119, 112, 106, 99, 93, 88, 84, 79, 74, 71, 68, 65,
+        62, 62, 62, 62, 202, 196, 189, 183, 176, 178, 180, 182, 184, 177, 171,
+        164, 157, 149, 140, 132, 123, 117, 110, 103, 96, 92, 87, 82, 77, 74, 71,
+        67, 64, 64, 64, 64, 198, 193, 188, 183, 178, 177, 175, 173, 171, 165,
+        159, 152, 146, 139, 131, 124, 117, 111, 105, 99, 93, 88, 84, 80, 75, 72,
+        69, 66, 63, 63, 63, 63, 195, 191, 187, 184, 180, 175, 169, 164, 158,
+        152, 147, 141, 135, 129, 123, 116, 110, 105, 99, 94, 89, 85, 81, 77, 73,
+        70, 67, 65, 62, 62, 62, 62, 191, 189, 187, 184, 182, 173, 164, 154, 145,
+        140, 134, 129, 124, 119, 114, 109, 103, 99, 94, 90, 85, 82, 78, 75, 71,
+        68, 66, 63, 61, 61, 61, 61, 187, 186, 186, 185, 184, 171, 158, 145, 132,
+        127, 122, 118, 113, 109, 105, 101, 97, 93, 89, 85, 82, 78, 75, 72, 69,
+        67, 64, 62, 59, 59, 59, 59, 176, 176, 177, 177, 177, 165, 152, 140, 127,
+        122, 117, 112, 107, 103, 99, 96, 92, 88, 85, 81, 78, 75, 72, 70, 67, 64,
+        62, 60, 58, 58, 58, 58, 165, 166, 168, 169, 171, 159, 147, 134, 122,
+        117, 112, 106, 101, 97, 94, 90, 87, 84, 81, 78, 75, 72, 69, 67, 64, 62,
+        60, 58, 56, 56, 56, 56, 153, 156, 159, 161, 164, 152, 141, 129, 118,
+        112, 106, 101, 95, 92, 88, 85, 82, 79, 76, 74, 71, 69, 67, 64, 62, 60,
+        59, 57, 55, 55, 55, 55, 142, 146, 150, 154, 157, 146, 135, 124, 113,
+        107, 101, 95, 89, 86, 83, 80, 77, 74, 72, 70, 68, 66, 64, 62, 60, 58,
+        57, 55, 54, 54, 54, 54, 133, 137, 141, 145, 149, 139, 129, 119, 109,
+        103, 97, 92, 86, 83, 80, 77, 74, 72, 69, 67, 65, 63, 62, 60, 58, 57, 55,
+        54, 52, 52, 52, 52, 124, 128, 132, 136, 140, 131, 123, 114, 105, 99, 94,
+        88, 83, 80, 77, 74, 71, 69, 67, 65, 63, 61, 59, 58, 56, 55, 54, 52, 51,
+        51, 51, 51, 115, 119, 124, 128, 132, 124, 116, 109, 101, 96, 90, 85, 80,
+        77, 74, 71, 68, 66, 64, 62, 60, 59, 57, 56, 54, 53, 52, 51, 50, 50, 50,
+        50, 106, 111, 115, 119, 123, 117, 110, 103, 97, 92, 87, 82, 77, 74, 71,
+        68, 65, 63, 61, 60, 58, 57, 55, 54, 53, 52, 51, 50, 48, 48, 48, 48, 100,
+        104, 108, 112, 117, 111, 105, 99, 93, 88, 84, 79, 74, 72, 69, 66, 63,
+        61, 60, 58, 56, 55, 54, 53, 51, 50, 49, 48, 47, 47, 47, 47, 94, 98, 102,
+        106, 110, 105, 99, 94, 89, 85, 81, 76, 72, 69, 67, 64, 61, 60, 58, 56,
+        55, 54, 52, 51, 50, 49, 48, 47, 46, 46, 46, 46, 88, 92, 96, 99, 103, 99,
+        94, 90, 85, 81, 78, 74, 70, 67, 65, 62, 60, 58, 56, 55, 53, 52, 51, 50,
+        49, 48, 47, 46, 46, 46, 46, 46, 82, 86, 89, 93, 96, 93, 89, 85, 82, 78,
+        75, 71, 68, 65, 63, 60, 58, 56, 55, 53, 52, 51, 50, 49, 48, 47, 46, 45,
+        45, 45, 45, 45, 78, 82, 85, 88, 92, 88, 85, 82, 78, 75, 72, 69, 66, 63,
+        61, 59, 57, 55, 54, 52, 51, 50, 49, 48, 47, 46, 45, 45, 44, 44, 44, 44,
+        74, 77, 80, 84, 87, 84, 81, 78, 75, 72, 69, 67, 64, 62, 59, 57, 55, 54,
+        52, 51, 50, 49, 48, 47, 46, 45, 44, 44, 43, 43, 43, 43, 70, 73, 76, 79,
+        82, 80, 77, 75, 72, 70, 67, 64, 62, 60, 58, 56, 54, 53, 51, 50, 49, 48,
+        47, 46, 45, 44, 44, 43, 42, 42, 42, 42, 66, 69, 72, 74, 77, 75, 73, 71,
+        69, 67, 64, 62, 60, 58, 56, 54, 53, 51, 50, 49, 48, 47, 46, 45, 44, 44,
+        43, 42, 42, 42, 42, 42, 64, 66, 69, 71, 74, 72, 70, 68, 67, 64, 62, 60,
+        58, 57, 55, 53, 52, 50, 49, 48, 47, 46, 45, 44, 44, 43, 42, 42, 41, 41,
+        41, 41, 61, 63, 66, 68, 71, 69, 67, 66, 64, 62, 60, 59, 57, 55, 54, 52,
+        51, 49, 48, 47, 46, 45, 44, 44, 43, 42, 42, 41, 41, 41, 41, 41, 58, 61,
+        63, 65, 67, 66, 65, 63, 62, 60, 58, 57, 55, 54, 52, 51, 50, 48, 47, 46,
+        45, 45, 44, 43, 42, 42, 41, 41, 40, 40, 40, 40, 56, 58, 60, 62, 64, 63,
+        62, 61, 59, 58, 56, 55, 54, 52, 51, 50, 48, 47, 46, 46, 45, 44, 43, 42,
+        42, 41, 41, 40, 40, 40, 40, 40, 56, 58, 60, 62, 64, 63, 62, 61, 59, 58,
+        56, 55, 54, 52, 51, 50, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 41, 40,
+        40, 40, 40, 40, 56, 58, 60, 62, 64, 63, 62, 61, 59, 58, 56, 55, 54, 52,
+        51, 50, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 41, 40, 40, 40, 40, 40,
+        56, 58, 60, 62, 64, 63, 62, 61, 59, 58, 56, 55, 54, 52, 51, 50, 48, 47,
+        46, 46, 45, 44, 43, 42, 42, 41, 41, 40, 40, 40, 40, 40 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 41, 38, 31, 41, 35, 33, 29, 38, 33, 26, 23, 31, 29, 23, 20,
+        /* Size 8 */
+        64, 79, 45, 42, 40, 36, 32, 29, 79, 55, 44, 48, 47, 43, 38, 34, 45, 44,
+        38, 39, 39, 37, 35, 32, 42, 48, 39, 35, 33, 32, 31, 29, 40, 47, 39, 33,
+        30, 28, 27, 26, 36, 43, 37, 32, 28, 26, 25, 23, 32, 38, 35, 31, 27, 25,
+        23, 22, 29, 34, 32, 29, 26, 23, 22, 21,
+        /* Size 16 */
+        64, 71, 79, 62, 45, 43, 42, 41, 40, 38, 36, 34, 32, 30, 29, 29, 71, 69,
+        67, 56, 44, 45, 45, 44, 43, 41, 39, 37, 35, 33, 31, 31, 79, 67, 55, 50,
+        44, 46, 48, 47, 47, 45, 43, 41, 38, 36, 34, 34, 62, 56, 50, 45, 41, 42,
+        44, 43, 43, 42, 40, 38, 37, 35, 33, 33, 45, 44, 44, 41, 38, 39, 39, 39,
+        39, 38, 37, 36, 35, 33, 32, 32, 43, 45, 46, 42, 39, 38, 37, 37, 36, 36,
+        35, 34, 33, 31, 30, 30, 42, 45, 48, 44, 39, 37, 35, 34, 33, 33, 32, 31,
+        31, 30, 29, 29, 41, 44, 47, 43, 39, 37, 34, 33, 32, 31, 30, 30, 29, 28,
+        27, 27, 40, 43, 47, 43, 39, 36, 33, 32, 30, 29, 28, 28, 27, 26, 26, 26,
+        38, 41, 45, 42, 38, 36, 33, 31, 29, 28, 27, 27, 26, 25, 25, 25, 36, 39,
+        43, 40, 37, 35, 32, 30, 28, 27, 26, 25, 25, 24, 23, 23, 34, 37, 41, 38,
+        36, 34, 31, 30, 28, 27, 25, 25, 24, 23, 23, 23, 32, 35, 38, 37, 35, 33,
+        31, 29, 27, 26, 25, 24, 23, 22, 22, 22, 30, 33, 36, 35, 33, 31, 30, 28,
+        26, 25, 24, 23, 22, 22, 21, 21, 29, 31, 34, 33, 32, 30, 29, 27, 26, 25,
+        23, 23, 22, 21, 21, 21, 29, 31, 34, 33, 32, 30, 29, 27, 26, 25, 23, 23,
+        22, 21, 21, 21,
+        /* Size 32 */
+        64, 68, 71, 75, 79, 70, 62, 53, 45, 44, 43, 43, 42, 42, 41, 40, 40, 39,
+        38, 37, 36, 35, 34, 33, 32, 31, 30, 30, 29, 29, 29, 29, 68, 69, 70, 72,
+        73, 66, 59, 52, 44, 44, 44, 44, 44, 43, 43, 42, 42, 41, 40, 39, 38, 37,
+        36, 35, 34, 33, 32, 31, 30, 30, 30, 30, 71, 70, 69, 68, 67, 61, 56, 50,
+        44, 44, 45, 45, 45, 45, 44, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34,
+        33, 32, 31, 31, 31, 31, 75, 72, 68, 65, 61, 57, 53, 48, 44, 45, 45, 46,
+        47, 46, 46, 45, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 33,
+        33, 33, 79, 73, 67, 61, 55, 52, 50, 47, 44, 45, 46, 47, 48, 48, 47, 47,
+        47, 46, 45, 44, 43, 42, 41, 40, 38, 37, 36, 35, 34, 34, 34, 34, 70, 66,
+        61, 57, 52, 50, 47, 45, 42, 43, 44, 45, 46, 46, 45, 45, 45, 44, 43, 42,
+        42, 41, 39, 38, 37, 36, 36, 35, 34, 34, 34, 34, 62, 59, 56, 53, 50, 47,
+        45, 43, 41, 42, 42, 43, 44, 43, 43, 43, 43, 42, 42, 41, 40, 39, 38, 37,
+        37, 36, 35, 34, 33, 33, 33, 33, 53, 52, 50, 48, 47, 45, 43, 41, 39, 40,
+        40, 41, 41, 41, 41, 41, 41, 41, 40, 39, 39, 38, 37, 36, 36, 35, 34, 33,
+        32, 32, 32, 32, 45, 44, 44, 44, 44, 42, 41, 39, 38, 38, 39, 39, 39, 39,
+        39, 39, 39, 39, 38, 38, 37, 37, 36, 35, 35, 34, 33, 32, 32, 32, 32, 32,
+        44, 44, 44, 45, 45, 43, 42, 40, 38, 38, 38, 38, 38, 38, 38, 38, 38, 37,
+        37, 37, 36, 36, 35, 34, 34, 33, 32, 32, 31, 31, 31, 31, 43, 44, 45, 45,
+        46, 44, 42, 40, 39, 38, 38, 37, 37, 37, 37, 37, 36, 36, 36, 35, 35, 34,
+        34, 33, 33, 32, 31, 31, 30, 30, 30, 30, 43, 44, 45, 46, 47, 45, 43, 41,
+        39, 38, 37, 37, 36, 36, 35, 35, 35, 35, 34, 34, 34, 33, 33, 32, 32, 31,
+        30, 30, 29, 29, 29, 29, 42, 44, 45, 47, 48, 46, 44, 41, 39, 38, 37, 36,
+        35, 35, 34, 34, 33, 33, 33, 33, 32, 32, 31, 31, 31, 30, 30, 29, 29, 29,
+        29, 29, 42, 43, 45, 46, 48, 46, 43, 41, 39, 38, 37, 36, 35, 34, 34, 33,
+        33, 32, 32, 32, 31, 31, 30, 30, 30, 29, 29, 28, 28, 28, 28, 28, 41, 43,
+        44, 46, 47, 45, 43, 41, 39, 38, 37, 35, 34, 34, 33, 32, 32, 31, 31, 31,
+        30, 30, 30, 29, 29, 28, 28, 28, 27, 27, 27, 27, 40, 42, 44, 45, 47, 45,
+        43, 41, 39, 38, 37, 35, 34, 33, 32, 32, 31, 31, 30, 30, 29, 29, 29, 28,
+        28, 28, 27, 27, 26, 26, 26, 26, 40, 42, 43, 45, 47, 45, 43, 41, 39, 38,
+        36, 35, 33, 33, 32, 31, 30, 30, 29, 29, 28, 28, 28, 27, 27, 27, 26, 26,
+        26, 26, 26, 26, 39, 41, 42, 44, 46, 44, 42, 41, 39, 37, 36, 35, 33, 32,
+        31, 31, 30, 29, 29, 28, 28, 28, 27, 27, 26, 26, 26, 25, 25, 25, 25, 25,
+        38, 40, 41, 43, 45, 43, 42, 40, 38, 37, 36, 34, 33, 32, 31, 30, 29, 29,
+        28, 28, 27, 27, 27, 26, 26, 26, 25, 25, 25, 25, 25, 25, 37, 39, 40, 42,
+        44, 42, 41, 39, 38, 37, 35, 34, 33, 32, 31, 30, 29, 28, 28, 27, 27, 26,
+        26, 26, 25, 25, 25, 24, 24, 24, 24, 24, 36, 38, 39, 41, 43, 42, 40, 39,
+        37, 36, 35, 34, 32, 31, 30, 29, 28, 28, 27, 27, 26, 26, 25, 25, 25, 24,
+        24, 24, 23, 23, 23, 23, 35, 37, 38, 40, 42, 41, 39, 38, 37, 36, 34, 33,
+        32, 31, 30, 29, 28, 28, 27, 26, 26, 25, 25, 25, 24, 24, 24, 23, 23, 23,
+        23, 23, 34, 36, 37, 39, 41, 39, 38, 37, 36, 35, 34, 33, 31, 30, 30, 29,
+        28, 27, 27, 26, 25, 25, 25, 24, 24, 24, 23, 23, 23, 23, 23, 23, 33, 35,
+        36, 38, 40, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 27, 26, 26,
+        25, 25, 24, 24, 23, 23, 23, 23, 22, 22, 22, 22, 32, 34, 35, 37, 38, 37,
+        37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 26, 25, 25, 24, 24, 23,
+        23, 23, 22, 22, 22, 22, 22, 22, 31, 33, 34, 36, 37, 36, 36, 35, 34, 33,
+        32, 31, 30, 29, 28, 28, 27, 26, 26, 25, 24, 24, 24, 23, 23, 22, 22, 22,
+        22, 22, 22, 22, 30, 32, 33, 35, 36, 36, 35, 34, 33, 32, 31, 30, 30, 29,
+        28, 27, 26, 26, 25, 25, 24, 24, 23, 23, 22, 22, 22, 22, 21, 21, 21, 21,
+        30, 31, 32, 34, 35, 35, 34, 33, 32, 32, 31, 30, 29, 28, 28, 27, 26, 25,
+        25, 24, 24, 23, 23, 23, 22, 22, 22, 21, 21, 21, 21, 21, 29, 30, 31, 33,
+        34, 34, 33, 32, 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, 25, 24, 23, 23,
+        23, 22, 22, 22, 21, 21, 21, 21, 21, 21, 29, 30, 31, 33, 34, 34, 33, 32,
+        32, 31, 30, 29, 29, 28, 27, 26, 26, 25, 25, 24, 23, 23, 23, 22, 22, 22,
+        21, 21, 21, 21, 21, 21, 29, 30, 31, 33, 34, 34, 33, 32, 32, 31, 30, 29,
+        29, 28, 27, 26, 26, 25, 25, 24, 23, 23, 23, 22, 22, 22, 21, 21, 21, 21,
+        21, 21, 29, 30, 31, 33, 34, 34, 33, 32, 32, 31, 30, 29, 29, 28, 27, 26,
+        26, 25, 25, 24, 23, 23, 23, 22, 22, 22, 21, 21, 21, 21, 21, 21 },
+      { /* Intra matrices */
+        /* Size 4 */
+        142, 90, 83, 66, 90, 75, 71, 62, 83, 71, 55, 49, 66, 62, 49, 41,
+        /* Size 8 */
+        132, 163, 91, 85, 80, 72, 64, 57, 163, 113, 89, 98, 95, 87, 77, 68, 91,
+        89, 76, 79, 79, 75, 69, 63, 85, 98, 79, 70, 67, 64, 60, 56, 80, 95, 79,
+        67, 60, 56, 53, 50, 72, 87, 75, 64, 56, 51, 48, 45, 64, 77, 69, 60, 53,
+        48, 44, 42, 57, 68, 63, 56, 50, 45, 42, 39,
+        /* Size 16 */
+        136, 152, 168, 130, 93, 90, 88, 85, 82, 78, 74, 70, 65, 62, 58, 58, 152,
+        147, 142, 117, 92, 93, 94, 92, 90, 86, 81, 77, 72, 68, 64, 64, 168, 142,
+        116, 104, 91, 96, 101, 99, 98, 93, 89, 84, 79, 75, 70, 70, 130, 117,
+        104, 94, 85, 88, 91, 90, 90, 86, 83, 79, 75, 71, 67, 67, 93, 92, 91, 85,
+        78, 80, 81, 81, 81, 79, 77, 74, 71, 68, 65, 65, 90, 93, 96, 88, 80, 78,
+        76, 76, 75, 73, 71, 69, 67, 64, 61, 61, 88, 94, 101, 91, 81, 76, 72, 70,
+        68, 67, 66, 64, 62, 60, 58, 58, 85, 92, 99, 90, 81, 76, 70, 68, 65, 63,
+        62, 60, 58, 56, 55, 55, 82, 90, 98, 90, 81, 75, 68, 65, 61, 60, 58, 56,
+        54, 53, 51, 51, 78, 86, 93, 86, 79, 73, 67, 63, 60, 57, 55, 53, 52, 50,
+        49, 49, 74, 81, 89, 83, 77, 71, 66, 62, 58, 55, 52, 51, 49, 48, 47, 47,
+        70, 77, 84, 79, 74, 69, 64, 60, 56, 53, 51, 49, 47, 46, 45, 45, 65, 72,
+        79, 75, 71, 67, 62, 58, 54, 52, 49, 47, 46, 44, 43, 43, 62, 68, 75, 71,
+        68, 64, 60, 56, 53, 50, 48, 46, 44, 43, 42, 42, 58, 64, 70, 67, 65, 61,
+        58, 55, 51, 49, 47, 45, 43, 42, 40, 40, 58, 64, 70, 67, 65, 61, 58, 55,
+        51, 49, 47, 45, 43, 42, 40, 40,
+        /* Size 32 */
+        137, 146, 154, 162, 170, 151, 132, 113, 94, 93, 92, 90, 89, 88, 86, 85,
+        83, 81, 79, 77, 75, 73, 71, 68, 66, 64, 63, 61, 59, 59, 59, 59, 146,
+        148, 151, 154, 157, 141, 125, 110, 94, 93, 93, 93, 92, 91, 90, 89, 87,
+        85, 83, 81, 79, 76, 74, 72, 70, 68, 66, 64, 62, 62, 62, 62, 154, 151,
+        149, 146, 144, 131, 119, 106, 93, 94, 94, 95, 96, 94, 93, 92, 91, 89,
+        87, 85, 83, 80, 78, 76, 73, 71, 69, 67, 65, 65, 65, 65, 162, 154, 146,
+        139, 131, 122, 112, 103, 93, 94, 96, 97, 99, 98, 97, 96, 95, 93, 91, 89,
+        86, 84, 82, 79, 77, 75, 72, 70, 68, 68, 68, 68, 170, 157, 144, 131, 118,
+        112, 105, 99, 93, 95, 97, 100, 102, 101, 101, 100, 99, 97, 95, 93, 90,
+        88, 85, 83, 80, 78, 76, 73, 71, 71, 71, 71, 151, 141, 131, 122, 112,
+        106, 100, 95, 89, 91, 93, 95, 97, 97, 96, 95, 95, 93, 91, 89, 87, 85,
+        83, 81, 78, 76, 74, 72, 70, 70, 70, 70, 132, 125, 119, 112, 105, 100,
+        96, 91, 86, 87, 89, 91, 92, 92, 91, 91, 91, 89, 88, 86, 84, 82, 80, 78,
+        76, 74, 72, 70, 68, 68, 68, 68, 113, 110, 106, 103, 99, 95, 91, 87, 83,
+        84, 85, 86, 87, 87, 87, 87, 87, 85, 84, 83, 81, 80, 78, 76, 74, 72, 70,
+        69, 67, 67, 67, 67, 94, 94, 93, 93, 93, 89, 86, 83, 79, 80, 81, 81, 82,
+        82, 82, 82, 82, 81, 80, 79, 78, 77, 75, 74, 72, 70, 69, 67, 65, 65, 65,
+        65, 93, 93, 94, 94, 95, 91, 87, 84, 80, 80, 80, 80, 80, 80, 79, 79, 79,
+        78, 77, 76, 75, 74, 73, 71, 70, 68, 67, 65, 64, 64, 64, 64, 92, 93, 94,
+        96, 97, 93, 89, 85, 81, 80, 79, 78, 77, 77, 77, 76, 76, 75, 74, 73, 72,
+        71, 70, 69, 67, 66, 65, 63, 62, 62, 62, 62, 90, 93, 95, 97, 100, 95, 91,
+        86, 81, 80, 78, 77, 75, 74, 74, 73, 73, 72, 71, 70, 69, 68, 67, 66, 65,
+        64, 63, 61, 60, 60, 60, 60, 89, 92, 96, 99, 102, 97, 92, 87, 82, 80, 77,
+        75, 73, 72, 71, 70, 69, 69, 68, 67, 67, 66, 65, 64, 63, 62, 61, 60, 59,
+        59, 59, 59, 88, 91, 94, 98, 101, 97, 92, 87, 82, 80, 77, 74, 72, 71, 70,
+        69, 68, 67, 66, 65, 64, 64, 63, 62, 61, 60, 59, 58, 57, 57, 57, 57, 86,
+        90, 93, 97, 101, 96, 91, 87, 82, 79, 77, 74, 71, 70, 68, 67, 66, 65, 64,
+        63, 62, 62, 61, 60, 59, 58, 57, 56, 55, 55, 55, 55, 85, 89, 92, 96, 100,
+        95, 91, 87, 82, 79, 76, 73, 70, 69, 67, 66, 64, 63, 62, 61, 60, 60, 59,
+        58, 57, 56, 55, 55, 54, 54, 54, 54, 83, 87, 91, 95, 99, 95, 91, 87, 82,
+        79, 76, 73, 69, 68, 66, 64, 62, 61, 60, 59, 58, 58, 57, 56, 55, 54, 54,
+        53, 52, 52, 52, 52, 81, 85, 89, 93, 97, 93, 89, 85, 81, 78, 75, 72, 69,
+        67, 65, 63, 61, 60, 59, 58, 57, 56, 55, 55, 54, 53, 52, 52, 51, 51, 51,
+        51, 79, 83, 87, 91, 95, 91, 88, 84, 80, 77, 74, 71, 68, 66, 64, 62, 60,
+        59, 58, 57, 56, 55, 54, 53, 53, 52, 51, 50, 50, 50, 50, 50, 77, 81, 85,
+        89, 93, 89, 86, 83, 79, 76, 73, 70, 67, 65, 63, 61, 59, 58, 57, 56, 54,
+        54, 53, 52, 51, 50, 50, 49, 48, 48, 48, 48, 75, 79, 83, 86, 90, 87, 84,
+        81, 78, 75, 72, 69, 67, 64, 62, 60, 58, 57, 56, 54, 53, 52, 52, 51, 50,
+        49, 48, 48, 47, 47, 47, 47, 73, 76, 80, 84, 88, 85, 82, 80, 77, 74, 71,
+        68, 66, 64, 62, 60, 58, 56, 55, 54, 52, 52, 51, 50, 49, 48, 48, 47, 46,
+        46, 46, 46, 71, 74, 78, 82, 85, 83, 80, 78, 75, 73, 70, 67, 65, 63, 61,
+        59, 57, 55, 54, 53, 52, 51, 50, 49, 48, 47, 47, 46, 45, 45, 45, 45, 68,
+        72, 76, 79, 83, 81, 78, 76, 74, 71, 69, 66, 64, 62, 60, 58, 56, 55, 53,
+        52, 51, 50, 49, 48, 47, 46, 46, 45, 44, 44, 44, 44, 66, 70, 73, 77, 80,
+        78, 76, 74, 72, 70, 67, 65, 63, 61, 59, 57, 55, 54, 53, 51, 50, 49, 48,
+        47, 46, 45, 45, 44, 44, 44, 44, 44, 64, 68, 71, 75, 78, 76, 74, 72, 70,
+        68, 66, 64, 62, 60, 58, 56, 54, 53, 52, 50, 49, 48, 47, 46, 45, 45, 44,
+        44, 43, 43, 43, 43, 63, 66, 69, 72, 76, 74, 72, 70, 69, 67, 65, 63, 61,
+        59, 57, 55, 54, 52, 51, 50, 48, 48, 47, 46, 45, 44, 44, 43, 42, 42, 42,
+        42, 61, 64, 67, 70, 73, 72, 70, 69, 67, 65, 63, 61, 60, 58, 56, 55, 53,
+        52, 50, 49, 48, 47, 46, 45, 44, 44, 43, 42, 42, 42, 42, 42, 59, 62, 65,
+        68, 71, 70, 68, 67, 65, 64, 62, 60, 59, 57, 55, 54, 52, 51, 50, 48, 47,
+        46, 45, 44, 44, 43, 42, 42, 41, 41, 41, 41, 59, 62, 65, 68, 71, 70, 68,
+        67, 65, 64, 62, 60, 59, 57, 55, 54, 52, 51, 50, 48, 47, 46, 45, 44, 44,
+        43, 42, 42, 41, 41, 41, 41, 59, 62, 65, 68, 71, 70, 68, 67, 65, 64, 62,
+        60, 59, 57, 55, 54, 52, 51, 50, 48, 47, 46, 45, 44, 44, 43, 42, 42, 41,
+        41, 41, 41, 59, 62, 65, 68, 71, 70, 68, 67, 65, 64, 62, 60, 59, 57, 55,
+        54, 52, 51, 50, 48, 47, 46, 45, 44, 44, 43, 42, 42, 41, 41, 41,
+        41 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 59, 38, 26, 59, 41, 31, 25, 38, 31, 24, 21, 26, 25, 21, 19,
+        /* Size 8 */
+        64, 83, 77, 60, 47, 37, 31, 28, 83, 73, 76, 66, 53, 43, 36, 31, 77, 76,
+        56, 49, 43, 37, 32, 29, 60, 66, 49, 40, 35, 32, 29, 27, 47, 53, 43, 35,
+        31, 28, 26, 25, 37, 43, 37, 32, 28, 26, 24, 23, 31, 36, 32, 29, 26, 24,
+        23, 22, 28, 31, 29, 27, 25, 23, 22, 21,
+        /* Size 16 */
+        64, 73, 83, 80, 77, 69, 60, 53, 47, 42, 37, 34, 31, 30, 28, 28, 73, 76,
+        78, 77, 77, 70, 63, 56, 50, 45, 40, 37, 34, 31, 29, 29, 83, 78, 73, 75,
+        76, 71, 66, 59, 53, 48, 43, 39, 36, 33, 31, 31, 80, 77, 75, 70, 66, 62,
+        58, 53, 48, 44, 40, 37, 34, 32, 30, 30, 77, 77, 76, 66, 56, 53, 49, 46,
+        43, 40, 37, 35, 32, 31, 29, 29, 69, 70, 71, 62, 53, 49, 45, 42, 39, 37,
+        35, 33, 31, 29, 28, 28, 60, 63, 66, 58, 49, 45, 40, 38, 35, 34, 32, 31,
+        29, 28, 27, 27, 53, 56, 59, 53, 46, 42, 38, 35, 33, 32, 30, 29, 28, 27,
+        26, 26, 47, 50, 53, 48, 43, 39, 35, 33, 31, 30, 28, 27, 26, 26, 25, 25,
+        42, 45, 48, 44, 40, 37, 34, 32, 30, 28, 27, 26, 25, 25, 24, 24, 37, 40,
+        43, 40, 37, 35, 32, 30, 28, 27, 26, 25, 24, 24, 23, 23, 34, 37, 39, 37,
+        35, 33, 31, 29, 27, 26, 25, 24, 24, 23, 23, 23, 31, 34, 36, 34, 32, 31,
+        29, 28, 26, 25, 24, 24, 23, 23, 22, 22, 30, 31, 33, 32, 31, 29, 28, 27,
+        26, 25, 24, 23, 23, 22, 22, 22, 28, 29, 31, 30, 29, 28, 27, 26, 25, 24,
+        23, 23, 22, 22, 21, 21, 28, 29, 31, 30, 29, 28, 27, 26, 25, 24, 23, 23,
+        22, 22, 21, 21,
+        /* Size 32 */
+        64, 69, 73, 78, 83, 81, 80, 79, 77, 73, 69, 64, 60, 57, 53, 50, 47, 44,
+        42, 40, 37, 36, 34, 33, 31, 30, 30, 29, 28, 28, 28, 28, 69, 72, 75, 78,
+        80, 80, 79, 78, 77, 73, 69, 65, 62, 58, 55, 52, 48, 46, 44, 41, 39, 37,
+        36, 34, 33, 31, 30, 29, 28, 28, 28, 28, 73, 75, 76, 77, 78, 78, 77, 77,
+        77, 73, 70, 66, 63, 60, 56, 53, 50, 47, 45, 43, 40, 38, 37, 35, 34, 32,
+        31, 30, 29, 29, 29, 29, 78, 78, 77, 76, 76, 76, 76, 76, 76, 73, 70, 67,
+        64, 61, 58, 55, 51, 49, 46, 44, 41, 40, 38, 36, 35, 33, 32, 31, 30, 30,
+        30, 30, 83, 80, 78, 76, 73, 74, 75, 75, 76, 74, 71, 68, 66, 63, 59, 56,
+        53, 50, 48, 45, 43, 41, 39, 37, 36, 34, 33, 32, 31, 31, 31, 31, 81, 80,
+        78, 76, 74, 73, 72, 72, 71, 69, 66, 64, 62, 59, 56, 53, 51, 48, 46, 44,
+        41, 40, 38, 36, 35, 34, 33, 31, 30, 30, 30, 30, 80, 79, 77, 76, 75, 72,
+        70, 68, 66, 64, 62, 60, 58, 55, 53, 50, 48, 46, 44, 42, 40, 39, 37, 36,
+        34, 33, 32, 31, 30, 30, 30, 30, 79, 78, 77, 76, 75, 72, 68, 65, 61, 59,
+        57, 55, 53, 51, 49, 47, 46, 44, 42, 40, 39, 37, 36, 35, 33, 32, 31, 30,
+        29, 29, 29, 29, 77, 77, 77, 76, 76, 71, 66, 61, 56, 54, 53, 51, 49, 48,
+        46, 45, 43, 42, 40, 39, 37, 36, 35, 34, 32, 32, 31, 30, 29, 29, 29, 29,
+        73, 73, 73, 73, 74, 69, 64, 59, 54, 53, 51, 49, 47, 45, 44, 43, 41, 40,
+        39, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 28, 28, 28, 69, 69, 70, 70,
+        71, 66, 62, 57, 53, 51, 49, 47, 45, 43, 42, 41, 39, 38, 37, 36, 35, 34,
+        33, 32, 31, 30, 29, 29, 28, 28, 28, 28, 64, 65, 66, 67, 68, 64, 60, 55,
+        51, 49, 47, 45, 42, 41, 40, 39, 37, 36, 35, 34, 33, 32, 32, 31, 30, 29,
+        29, 28, 27, 27, 27, 27, 60, 62, 63, 64, 66, 62, 58, 53, 49, 47, 45, 42,
+        40, 39, 38, 37, 35, 35, 34, 33, 32, 31, 31, 30, 29, 28, 28, 27, 27, 27,
+        27, 27, 57, 58, 60, 61, 63, 59, 55, 51, 48, 45, 43, 41, 39, 38, 37, 35,
+        34, 33, 33, 32, 31, 30, 30, 29, 28, 28, 27, 27, 26, 26, 26, 26, 53, 55,
+        56, 58, 59, 56, 53, 49, 46, 44, 42, 40, 38, 37, 35, 34, 33, 32, 32, 31,
+        30, 30, 29, 28, 28, 27, 27, 26, 26, 26, 26, 26, 50, 52, 53, 55, 56, 53,
+        50, 47, 45, 43, 41, 39, 37, 35, 34, 33, 32, 31, 31, 30, 29, 29, 28, 28,
+        27, 27, 26, 26, 25, 25, 25, 25, 47, 48, 50, 51, 53, 51, 48, 46, 43, 41,
+        39, 37, 35, 34, 33, 32, 31, 30, 30, 29, 28, 28, 27, 27, 26, 26, 26, 25,
+        25, 25, 25, 25, 44, 46, 47, 49, 50, 48, 46, 44, 42, 40, 38, 36, 35, 33,
+        32, 31, 30, 30, 29, 28, 28, 27, 27, 26, 26, 25, 25, 25, 24, 24, 24, 24,
+        42, 44, 45, 46, 48, 46, 44, 42, 40, 39, 37, 35, 34, 33, 32, 31, 30, 29,
+        28, 28, 27, 27, 26, 26, 25, 25, 25, 24, 24, 24, 24, 24, 40, 41, 43, 44,
+        45, 44, 42, 40, 39, 37, 36, 34, 33, 32, 31, 30, 29, 28, 28, 27, 27, 26,
+        26, 25, 25, 25, 24, 24, 24, 24, 24, 24, 37, 39, 40, 41, 43, 41, 40, 39,
+        37, 36, 35, 33, 32, 31, 30, 29, 28, 28, 27, 27, 26, 26, 25, 25, 24, 24,
+        24, 24, 23, 23, 23, 23, 36, 37, 38, 40, 41, 40, 39, 37, 36, 35, 34, 32,
+        31, 30, 30, 29, 28, 27, 27, 26, 26, 25, 25, 24, 24, 24, 24, 23, 23, 23,
+        23, 23, 34, 36, 37, 38, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28,
+        27, 27, 26, 26, 25, 25, 24, 24, 24, 23, 23, 23, 23, 23, 23, 23, 33, 34,
+        35, 36, 37, 36, 36, 35, 34, 33, 32, 31, 30, 29, 28, 28, 27, 26, 26, 25,
+        25, 24, 24, 24, 23, 23, 23, 23, 22, 22, 22, 22, 31, 33, 34, 35, 36, 35,
+        34, 33, 32, 32, 31, 30, 29, 28, 28, 27, 26, 26, 25, 25, 24, 24, 24, 23,
+        23, 23, 23, 22, 22, 22, 22, 22, 30, 31, 32, 33, 34, 34, 33, 32, 32, 31,
+        30, 29, 28, 28, 27, 27, 26, 25, 25, 25, 24, 24, 23, 23, 23, 23, 22, 22,
+        22, 22, 22, 22, 30, 30, 31, 32, 33, 33, 32, 31, 31, 30, 29, 29, 28, 27,
+        27, 26, 26, 25, 25, 24, 24, 24, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22,
+        29, 29, 30, 31, 32, 31, 31, 30, 30, 29, 29, 28, 27, 27, 26, 26, 25, 25,
+        24, 24, 24, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 28, 28, 29, 30,
+        31, 30, 30, 29, 29, 28, 28, 27, 27, 26, 26, 25, 25, 24, 24, 24, 23, 23,
+        23, 22, 22, 22, 22, 22, 21, 21, 21, 21, 28, 28, 29, 30, 31, 30, 30, 29,
+        29, 28, 28, 27, 27, 26, 26, 25, 25, 24, 24, 24, 23, 23, 23, 22, 22, 22,
+        22, 22, 21, 21, 21, 21, 28, 28, 29, 30, 31, 30, 30, 29, 29, 28, 28, 27,
+        27, 26, 26, 25, 25, 24, 24, 24, 23, 23, 23, 22, 22, 22, 22, 22, 21, 21,
+        21, 21, 28, 28, 29, 30, 31, 30, 30, 29, 29, 28, 28, 27, 27, 26, 26, 25,
+        25, 24, 24, 24, 23, 23, 23, 22, 22, 22, 22, 22, 21, 21, 21, 21 },
+      { /* Intra matrices */
+        /* Size 4 */
+        155, 142, 90, 61, 142, 97, 72, 56, 90, 72, 54, 46, 61, 56, 46, 41,
+        /* Size 8 */
+        131, 172, 159, 123, 94, 74, 62, 53, 172, 151, 157, 135, 108, 86, 70, 60,
+        159, 157, 115, 99, 86, 74, 64, 56, 123, 135, 99, 80, 70, 63, 56, 51, 94,
+        108, 86, 70, 61, 55, 50, 47, 74, 86, 74, 63, 55, 50, 46, 44, 62, 70, 64,
+        56, 50, 46, 44, 42, 53, 60, 56, 51, 47, 44, 42, 40,
+        /* Size 16 */
+        136, 157, 177, 171, 165, 146, 127, 112, 97, 87, 77, 70, 64, 59, 55, 55,
+        157, 162, 167, 165, 163, 148, 133, 119, 104, 93, 83, 75, 68, 63, 58, 58,
+        177, 167, 156, 159, 162, 151, 140, 126, 111, 100, 89, 81, 73, 67, 62,
+        62, 171, 165, 159, 150, 140, 131, 121, 111, 100, 91, 83, 76, 69, 65, 60,
+        60, 165, 163, 162, 140, 118, 110, 103, 96, 89, 83, 76, 71, 66, 62, 58,
+        58, 146, 148, 151, 131, 110, 102, 93, 87, 81, 76, 71, 66, 62, 59, 55,
+        55, 127, 133, 140, 121, 103, 93, 83, 78, 72, 68, 65, 61, 58, 56, 53, 53,
+        112, 119, 126, 111, 96, 87, 78, 72, 67, 64, 61, 58, 55, 53, 51, 51, 97,
+        104, 111, 100, 89, 81, 72, 67, 63, 60, 57, 54, 52, 50, 49, 49, 87, 93,
+        100, 91, 83, 76, 68, 64, 60, 57, 54, 52, 50, 49, 47, 47, 77, 83, 89, 83,
+        76, 71, 65, 61, 57, 54, 51, 50, 48, 47, 45, 45, 70, 75, 81, 76, 71, 66,
+        61, 58, 54, 52, 50, 48, 46, 45, 44, 44, 64, 68, 73, 69, 66, 62, 58, 55,
+        52, 50, 48, 46, 45, 44, 43, 43, 59, 63, 67, 65, 62, 59, 56, 53, 50, 49,
+        47, 45, 44, 43, 42, 42, 55, 58, 62, 60, 58, 55, 53, 51, 49, 47, 45, 44,
+        43, 42, 41, 41, 55, 58, 62, 60, 58, 55, 53, 51, 49, 47, 45, 44, 43, 42,
+        41, 41,
+        /* Size 32 */
+        138, 148, 159, 170, 180, 177, 174, 171, 167, 158, 148, 139, 129, 122,
+        114, 106, 99, 94, 88, 83, 78, 75, 71, 68, 65, 62, 60, 58, 56, 56, 56,
+        56, 148, 155, 162, 168, 175, 173, 171, 169, 167, 158, 150, 141, 132,
+        125, 117, 110, 102, 97, 92, 86, 81, 78, 74, 71, 67, 65, 62, 60, 58, 58,
+        58, 58, 159, 162, 164, 167, 169, 169, 168, 167, 166, 159, 151, 143, 136,
+        128, 121, 113, 106, 100, 95, 90, 84, 80, 77, 73, 69, 67, 64, 62, 59, 59,
+        59, 59, 170, 168, 167, 165, 164, 164, 165, 165, 166, 159, 152, 146, 139,
+        132, 124, 117, 110, 104, 98, 93, 87, 83, 79, 76, 72, 69, 66, 64, 61, 61,
+        61, 61, 180, 175, 169, 164, 158, 160, 162, 163, 165, 159, 154, 148, 142,
+        135, 128, 120, 113, 107, 102, 96, 90, 86, 82, 78, 74, 71, 68, 66, 63,
+        63, 63, 63, 177, 173, 169, 164, 160, 159, 157, 155, 154, 149, 143, 138,
+        133, 126, 120, 114, 108, 102, 97, 92, 87, 83, 80, 76, 72, 70, 67, 64,
+        62, 62, 62, 62, 174, 171, 168, 165, 162, 157, 152, 147, 143, 138, 133,
+        128, 123, 118, 113, 107, 102, 97, 93, 88, 84, 81, 77, 74, 70, 68, 66,
+        63, 61, 61, 61, 61, 171, 169, 167, 165, 163, 155, 147, 139, 132, 127,
+        123, 118, 114, 109, 105, 101, 96, 92, 89, 85, 81, 78, 75, 72, 69, 66,
+        64, 62, 60, 60, 60, 60, 167, 167, 166, 166, 165, 154, 143, 132, 120,
+        116, 112, 108, 104, 101, 98, 94, 91, 87, 84, 81, 78, 75, 72, 70, 67, 65,
+        63, 61, 59, 59, 59, 59, 158, 158, 159, 159, 159, 149, 138, 127, 116,
+        112, 108, 104, 99, 96, 93, 90, 86, 83, 81, 78, 75, 72, 70, 67, 65, 63,
+        61, 59, 57, 57, 57, 57, 148, 150, 151, 152, 154, 143, 133, 123, 112,
+        108, 103, 99, 94, 91, 88, 85, 82, 79, 77, 74, 72, 70, 67, 65, 63, 61,
+        60, 58, 56, 56, 56, 56, 139, 141, 143, 146, 148, 138, 128, 118, 108,
+        104, 99, 94, 89, 86, 83, 81, 78, 76, 73, 71, 69, 67, 65, 63, 61, 60, 58,
+        57, 55, 55, 55, 55, 129, 132, 136, 139, 142, 133, 123, 114, 104, 99, 94,
+        89, 84, 81, 79, 76, 74, 72, 70, 68, 66, 64, 62, 61, 59, 58, 57, 55, 54,
+        54, 54, 54, 122, 125, 128, 132, 135, 126, 118, 109, 101, 96, 91, 86, 81,
+        79, 76, 74, 71, 69, 67, 66, 64, 62, 61, 59, 58, 56, 55, 54, 53, 53, 53,
+        53, 114, 117, 121, 124, 128, 120, 113, 105, 98, 93, 88, 83, 79, 76, 74,
+        71, 69, 67, 65, 63, 62, 60, 59, 57, 56, 55, 54, 53, 52, 52, 52, 52, 106,
+        110, 113, 117, 120, 114, 107, 101, 94, 90, 85, 81, 76, 74, 71, 69, 66,
+        64, 63, 61, 60, 58, 57, 56, 55, 54, 53, 52, 51, 51, 51, 51, 99, 102,
+        106, 110, 113, 108, 102, 96, 91, 86, 82, 78, 74, 71, 69, 66, 64, 62, 61,
+        59, 57, 56, 55, 54, 53, 52, 51, 50, 49, 49, 49, 49, 94, 97, 100, 104,
+        107, 102, 97, 92, 87, 83, 79, 76, 72, 69, 67, 64, 62, 61, 59, 58, 56,
+        55, 54, 53, 52, 51, 50, 49, 49, 49, 49, 49, 88, 92, 95, 98, 102, 97, 93,
+        89, 84, 81, 77, 73, 70, 67, 65, 63, 61, 59, 58, 56, 55, 54, 53, 52, 51,
+        50, 49, 49, 48, 48, 48, 48, 83, 86, 90, 93, 96, 92, 88, 85, 81, 78, 74,
+        71, 68, 66, 63, 61, 59, 58, 56, 55, 54, 53, 52, 51, 50, 49, 48, 48, 47,
+        47, 47, 47, 78, 81, 84, 87, 90, 87, 84, 81, 78, 75, 72, 69, 66, 64, 62,
+        60, 57, 56, 55, 54, 52, 51, 50, 50, 49, 48, 47, 47, 46, 46, 46, 46, 75,
+        78, 80, 83, 86, 83, 81, 78, 75, 72, 70, 67, 64, 62, 60, 58, 56, 55, 54,
+        53, 51, 51, 50, 49, 48, 47, 47, 46, 46, 46, 46, 46, 71, 74, 77, 79, 82,
+        80, 77, 75, 72, 70, 67, 65, 62, 61, 59, 57, 55, 54, 53, 52, 50, 50, 49,
+        48, 47, 47, 46, 45, 45, 45, 45, 45, 68, 71, 73, 76, 78, 76, 74, 72, 70,
+        67, 65, 63, 61, 59, 57, 56, 54, 53, 52, 51, 50, 49, 48, 47, 47, 46, 45,
+        45, 44, 44, 44, 44, 65, 67, 69, 72, 74, 72, 70, 69, 67, 65, 63, 61, 59,
+        58, 56, 55, 53, 52, 51, 50, 49, 48, 47, 47, 46, 45, 45, 44, 44, 44, 44,
+        44, 62, 65, 67, 69, 71, 70, 68, 66, 65, 63, 61, 60, 58, 56, 55, 54, 52,
+        51, 50, 49, 48, 47, 47, 46, 45, 45, 44, 44, 43, 43, 43, 43, 60, 62, 64,
+        66, 68, 67, 66, 64, 63, 61, 60, 58, 57, 55, 54, 53, 51, 50, 49, 48, 47,
+        47, 46, 45, 45, 44, 44, 43, 43, 43, 43, 43, 58, 60, 62, 64, 66, 64, 63,
+        62, 61, 59, 58, 57, 55, 54, 53, 52, 50, 49, 49, 48, 47, 46, 45, 45, 44,
+        44, 43, 43, 42, 42, 42, 42, 56, 58, 59, 61, 63, 62, 61, 60, 59, 57, 56,
+        55, 54, 53, 52, 51, 49, 49, 48, 47, 46, 46, 45, 44, 44, 43, 43, 42, 42,
+        42, 42, 42, 56, 58, 59, 61, 63, 62, 61, 60, 59, 57, 56, 55, 54, 53, 52,
+        51, 49, 49, 48, 47, 46, 46, 45, 44, 44, 43, 43, 42, 42, 42, 42, 42, 56,
+        58, 59, 61, 63, 62, 61, 60, 59, 57, 56, 55, 54, 53, 52, 51, 49, 49, 48,
+        47, 46, 46, 45, 44, 44, 43, 43, 42, 42, 42, 42, 42, 56, 58, 59, 61, 63,
+        62, 61, 60, 59, 57, 56, 55, 54, 53, 52, 51, 49, 49, 48, 47, 46, 46, 45,
+        44, 44, 43, 43, 42, 42, 42, 42, 42 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 42, 40, 32, 42, 36, 34, 31, 40, 34, 28, 25, 32, 31, 25, 22,
+        /* Size 8 */
+        64, 78, 46, 43, 41, 37, 34, 31, 78, 56, 45, 49, 48, 44, 40, 36, 46, 45,
+        39, 40, 41, 39, 36, 33, 43, 49, 40, 36, 35, 34, 32, 30, 41, 48, 41, 35,
+        32, 30, 29, 28, 37, 44, 39, 34, 30, 28, 27, 26, 34, 40, 36, 32, 29, 27,
+        25, 24, 31, 36, 33, 30, 28, 26, 24, 23,
+        /* Size 16 */
+        64, 71, 78, 62, 46, 45, 43, 42, 41, 39, 37, 36, 34, 32, 31, 31, 71, 69,
+        67, 56, 45, 46, 46, 45, 44, 43, 41, 39, 37, 35, 33, 33, 78, 67, 56, 50,
+        45, 47, 49, 48, 48, 46, 44, 42, 40, 38, 36, 36, 62, 56, 50, 46, 42, 43,
+        45, 44, 44, 43, 41, 40, 38, 36, 35, 35, 46, 45, 45, 42, 39, 40, 40, 40,
+        41, 40, 39, 38, 36, 35, 33, 33, 45, 46, 47, 43, 40, 39, 38, 38, 38, 37,
+        36, 35, 34, 33, 32, 32, 43, 46, 49, 45, 40, 38, 36, 36, 35, 34, 34, 33,
+        32, 31, 30, 30, 42, 45, 48, 44, 40, 38, 36, 35, 34, 33, 32, 31, 31, 30,
+        29, 29, 41, 44, 48, 44, 41, 38, 35, 34, 32, 31, 30, 30, 29, 28, 28, 28,
+        39, 43, 46, 43, 40, 37, 34, 33, 31, 30, 29, 29, 28, 27, 27, 27, 37, 41,
+        44, 41, 39, 36, 34, 32, 30, 29, 28, 27, 27, 26, 26, 26, 36, 39, 42, 40,
+        38, 35, 33, 31, 30, 29, 27, 27, 26, 25, 25, 25, 34, 37, 40, 38, 36, 34,
+        32, 31, 29, 28, 27, 26, 25, 25, 24, 24, 32, 35, 38, 36, 35, 33, 31, 30,
+        28, 27, 26, 25, 25, 24, 23, 23, 31, 33, 36, 35, 33, 32, 30, 29, 28, 27,
+        26, 25, 24, 23, 23, 23, 31, 33, 36, 35, 33, 32, 30, 29, 28, 27, 26, 25,
+        24, 23, 23, 23,
+        /* Size 32 */
+        64, 67, 71, 74, 78, 70, 62, 54, 46, 45, 45, 44, 43, 43, 42, 42, 41, 40,
+        39, 38, 37, 36, 36, 35, 34, 33, 32, 31, 31, 31, 31, 31, 67, 69, 70, 71,
+        72, 66, 59, 52, 45, 45, 45, 45, 45, 44, 44, 43, 43, 42, 41, 40, 39, 38,
+        37, 36, 35, 34, 34, 33, 32, 32, 32, 32, 71, 70, 69, 68, 67, 61, 56, 51,
+        45, 46, 46, 46, 46, 46, 45, 45, 44, 43, 43, 42, 41, 40, 39, 38, 37, 36,
+        35, 34, 33, 33, 33, 33, 74, 71, 68, 65, 61, 57, 53, 49, 45, 46, 46, 47,
+        48, 47, 47, 46, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 34,
+        34, 34, 78, 72, 67, 61, 56, 53, 50, 48, 45, 46, 47, 48, 49, 49, 48, 48,
+        48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 36, 36, 36, 70, 66,
+        61, 57, 53, 51, 48, 46, 44, 44, 45, 46, 47, 47, 46, 46, 46, 45, 44, 44,
+        43, 42, 41, 40, 39, 38, 37, 36, 35, 35, 35, 35, 62, 59, 56, 53, 50, 48,
+        46, 44, 42, 43, 43, 44, 45, 45, 44, 44, 44, 43, 43, 42, 41, 41, 40, 39,
+        38, 37, 36, 35, 35, 35, 35, 35, 54, 52, 51, 49, 48, 46, 44, 42, 41, 41,
+        42, 42, 43, 43, 42, 42, 42, 42, 41, 41, 40, 39, 39, 38, 37, 36, 36, 35,
+        34, 34, 34, 34, 46, 45, 45, 45, 45, 44, 42, 41, 39, 40, 40, 40, 40, 40,
+        40, 41, 41, 40, 40, 39, 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 33, 33,
+        45, 45, 46, 46, 46, 44, 43, 41, 40, 40, 40, 39, 39, 39, 39, 39, 39, 39,
+        38, 38, 38, 37, 36, 36, 35, 35, 34, 33, 33, 33, 33, 33, 45, 45, 46, 46,
+        47, 45, 43, 42, 40, 40, 39, 39, 38, 38, 38, 38, 38, 37, 37, 37, 36, 36,
+        35, 35, 34, 34, 33, 32, 32, 32, 32, 32, 44, 45, 46, 47, 48, 46, 44, 42,
+        40, 39, 39, 38, 37, 37, 37, 37, 36, 36, 36, 35, 35, 35, 34, 34, 33, 33,
+        32, 32, 31, 31, 31, 31, 43, 45, 46, 48, 49, 47, 45, 43, 40, 39, 38, 37,
+        36, 36, 36, 35, 35, 35, 34, 34, 34, 33, 33, 33, 32, 32, 31, 31, 30, 30,
+        30, 30, 43, 44, 46, 47, 49, 47, 45, 43, 40, 39, 38, 37, 36, 36, 35, 35,
+        34, 34, 34, 33, 33, 33, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 42, 44,
+        45, 47, 48, 46, 44, 42, 40, 39, 38, 37, 36, 35, 35, 34, 34, 33, 33, 32,
+        32, 32, 31, 31, 31, 30, 30, 29, 29, 29, 29, 29, 42, 43, 45, 46, 48, 46,
+        44, 42, 41, 39, 38, 37, 35, 35, 34, 33, 33, 32, 32, 32, 31, 31, 31, 30,
+        30, 29, 29, 29, 28, 28, 28, 28, 41, 43, 44, 46, 48, 46, 44, 42, 41, 39,
+        38, 36, 35, 34, 34, 33, 32, 32, 31, 31, 30, 30, 30, 29, 29, 29, 28, 28,
+        28, 28, 28, 28, 40, 42, 43, 45, 47, 45, 43, 42, 40, 39, 37, 36, 35, 34,
+        33, 32, 32, 31, 31, 30, 30, 29, 29, 29, 28, 28, 28, 28, 27, 27, 27, 27,
+        39, 41, 43, 44, 46, 44, 43, 41, 40, 38, 37, 36, 34, 34, 33, 32, 31, 31,
+        30, 30, 29, 29, 29, 28, 28, 28, 27, 27, 27, 27, 27, 27, 38, 40, 42, 43,
+        45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32, 32, 31, 30, 30, 29, 29, 28,
+        28, 28, 27, 27, 27, 26, 26, 26, 26, 26, 37, 39, 41, 42, 44, 43, 41, 40,
+        39, 38, 36, 35, 34, 33, 32, 31, 30, 30, 29, 29, 28, 28, 27, 27, 27, 26,
+        26, 26, 26, 26, 26, 26, 36, 38, 40, 41, 43, 42, 41, 39, 38, 37, 36, 35,
+        33, 33, 32, 31, 30, 29, 29, 28, 28, 27, 27, 27, 26, 26, 26, 25, 25, 25,
+        25, 25, 36, 37, 39, 40, 42, 41, 40, 39, 38, 36, 35, 34, 33, 32, 31, 31,
+        30, 29, 29, 28, 27, 27, 27, 26, 26, 26, 25, 25, 25, 25, 25, 25, 35, 36,
+        38, 39, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 29, 28, 28,
+        27, 27, 26, 26, 26, 25, 25, 25, 24, 24, 24, 24, 34, 35, 37, 38, 40, 39,
+        38, 37, 36, 35, 34, 33, 32, 31, 31, 30, 29, 28, 28, 27, 27, 26, 26, 26,
+        25, 25, 25, 24, 24, 24, 24, 24, 33, 34, 36, 37, 39, 38, 37, 36, 35, 35,
+        34, 33, 32, 31, 30, 29, 29, 28, 28, 27, 26, 26, 26, 25, 25, 25, 24, 24,
+        24, 24, 24, 24, 32, 34, 35, 36, 38, 37, 36, 36, 35, 34, 33, 32, 31, 31,
+        30, 29, 28, 28, 27, 27, 26, 26, 25, 25, 25, 24, 24, 24, 23, 23, 23, 23,
+        31, 33, 34, 35, 37, 36, 35, 35, 34, 33, 32, 32, 31, 30, 29, 29, 28, 28,
+        27, 26, 26, 25, 25, 25, 24, 24, 24, 23, 23, 23, 23, 23, 31, 32, 33, 34,
+        36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28, 28, 27, 27, 26, 26, 25,
+        25, 24, 24, 24, 23, 23, 23, 23, 23, 23, 31, 32, 33, 34, 36, 35, 35, 34,
+        33, 33, 32, 31, 30, 30, 29, 28, 28, 27, 27, 26, 26, 25, 25, 24, 24, 24,
+        23, 23, 23, 23, 23, 23, 31, 32, 33, 34, 36, 35, 35, 34, 33, 33, 32, 31,
+        30, 30, 29, 28, 28, 27, 27, 26, 26, 25, 25, 24, 24, 24, 23, 23, 23, 23,
+        23, 23, 31, 32, 33, 34, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28,
+        28, 27, 27, 26, 26, 25, 25, 24, 24, 24, 23, 23, 23, 23, 23, 23 },
+      { /* Intra matrices */
+        /* Size 4 */
+        133, 87, 80, 65, 87, 73, 69, 61, 80, 69, 56, 50, 65, 61, 50, 43,
+        /* Size 8 */
+        124, 152, 87, 83, 78, 70, 63, 57, 152, 107, 86, 94, 91, 84, 75, 67, 87,
+        86, 74, 77, 77, 73, 68, 62, 83, 94, 77, 69, 66, 63, 60, 56, 78, 91, 77,
+        66, 60, 56, 54, 51, 70, 84, 73, 63, 56, 52, 49, 47, 63, 75, 68, 60, 54,
+        49, 46, 44, 57, 67, 62, 56, 51, 47, 44, 41,
+        /* Size 16 */
+        127, 141, 155, 122, 89, 87, 84, 82, 80, 76, 72, 68, 65, 61, 58, 58, 141,
+        137, 133, 111, 88, 89, 90, 88, 87, 83, 79, 75, 71, 67, 63, 63, 155, 133,
+        110, 99, 88, 92, 96, 95, 93, 90, 86, 81, 77, 73, 69, 69, 122, 111, 99,
+        90, 82, 85, 87, 87, 86, 83, 80, 77, 73, 70, 66, 66, 89, 88, 88, 82, 76,
+        77, 78, 79, 79, 77, 75, 72, 70, 67, 64, 64, 87, 89, 92, 85, 77, 76, 74,
+        74, 73, 71, 70, 68, 66, 63, 61, 61, 84, 90, 96, 87, 78, 74, 70, 69, 67,
+        66, 65, 63, 62, 60, 58, 58, 82, 88, 95, 87, 79, 74, 69, 67, 64, 63, 61,
+        60, 58, 57, 55, 55, 80, 87, 93, 86, 79, 73, 67, 64, 61, 59, 58, 56, 55,
+        54, 52, 52, 76, 83, 90, 83, 77, 71, 66, 63, 59, 57, 55, 54, 53, 51, 50,
+        50, 72, 79, 86, 80, 75, 70, 65, 61, 58, 55, 53, 52, 50, 49, 48, 48, 68,
+        75, 81, 77, 72, 68, 63, 60, 56, 54, 52, 50, 49, 47, 46, 46, 65, 71, 77,
+        73, 70, 66, 62, 58, 55, 53, 50, 49, 47, 46, 45, 45, 61, 67, 73, 70, 67,
+        63, 60, 57, 54, 51, 49, 47, 46, 45, 43, 43, 58, 63, 69, 66, 64, 61, 58,
+        55, 52, 50, 48, 46, 45, 43, 42, 42, 58, 63, 69, 66, 64, 61, 58, 55, 52,
+        50, 48, 46, 45, 43, 42, 42,
+        /* Size 32 */
+        129, 136, 143, 150, 157, 141, 124, 107, 90, 89, 88, 87, 86, 84, 83, 82,
+        81, 79, 77, 75, 73, 71, 69, 67, 65, 64, 62, 60, 59, 59, 59, 59, 136,
+        138, 141, 143, 146, 132, 118, 104, 90, 90, 89, 89, 88, 87, 86, 85, 84,
+        82, 80, 78, 76, 74, 72, 71, 69, 67, 65, 63, 62, 62, 62, 62, 143, 141,
+        139, 137, 134, 123, 112, 101, 89, 90, 90, 91, 91, 90, 89, 89, 88, 86,
+        84, 82, 80, 78, 76, 74, 72, 70, 68, 66, 64, 64, 64, 64, 150, 143, 137,
+        130, 123, 114, 106, 98, 89, 90, 92, 93, 94, 93, 93, 92, 91, 89, 87, 85,
+        83, 81, 79, 77, 75, 73, 71, 69, 67, 67, 67, 67, 157, 146, 134, 123, 111,
+        106, 100, 94, 89, 91, 93, 95, 97, 97, 96, 95, 95, 93, 91, 89, 87, 85,
+        82, 80, 78, 76, 74, 72, 70, 70, 70, 70, 141, 132, 123, 114, 106, 101,
+        96, 91, 86, 88, 89, 91, 93, 92, 92, 91, 91, 89, 87, 86, 84, 82, 80, 78,
+        76, 74, 72, 70, 68, 68, 68, 68, 124, 118, 112, 106, 100, 96, 91, 87, 83,
+        84, 86, 87, 88, 88, 88, 87, 87, 86, 84, 83, 81, 80, 78, 76, 74, 72, 71,
+        69, 67, 67, 67, 67, 107, 104, 101, 98, 94, 91, 87, 84, 80, 81, 82, 83,
+        84, 84, 84, 83, 83, 82, 81, 80, 79, 77, 76, 74, 72, 71, 69, 68, 66, 66,
+        66, 66, 90, 90, 89, 89, 89, 86, 83, 80, 77, 78, 78, 79, 79, 79, 80, 80,
+        80, 79, 78, 77, 76, 75, 73, 72, 71, 69, 68, 66, 65, 65, 65, 65, 89, 90,
+        90, 90, 91, 88, 84, 81, 78, 78, 77, 77, 77, 77, 77, 77, 77, 76, 75, 74,
+        73, 72, 71, 70, 69, 67, 66, 64, 63, 63, 63, 63, 88, 89, 90, 92, 93, 89,
+        86, 82, 78, 77, 77, 76, 75, 75, 75, 74, 74, 73, 72, 72, 71, 70, 69, 68,
+        66, 65, 64, 63, 62, 62, 62, 62, 87, 89, 91, 93, 95, 91, 87, 83, 79, 77,
+        76, 75, 73, 73, 72, 72, 71, 70, 70, 69, 68, 67, 66, 65, 64, 63, 62, 61,
+        60, 60, 60, 60, 86, 88, 91, 94, 97, 93, 88, 84, 79, 77, 75, 73, 71, 70,
+        70, 69, 68, 68, 67, 66, 66, 65, 64, 63, 62, 61, 60, 59, 59, 59, 59, 59,
+        84, 87, 90, 93, 97, 92, 88, 84, 79, 77, 75, 73, 70, 69, 68, 68, 67, 66,
+        65, 65, 64, 63, 62, 61, 61, 60, 59, 58, 57, 57, 57, 57, 83, 86, 89, 93,
+        96, 92, 88, 84, 80, 77, 75, 72, 70, 68, 67, 66, 65, 64, 64, 63, 62, 61,
+        60, 60, 59, 58, 57, 57, 56, 56, 56, 56, 82, 85, 89, 92, 95, 91, 87, 83,
+        80, 77, 74, 72, 69, 68, 66, 65, 63, 63, 62, 61, 60, 59, 59, 58, 57, 57,
+        56, 55, 54, 54, 54, 54, 81, 84, 88, 91, 95, 91, 87, 83, 80, 77, 74, 71,
+        68, 67, 65, 63, 62, 61, 60, 59, 58, 58, 57, 56, 56, 55, 54, 54, 53, 53,
+        53, 53, 79, 82, 86, 89, 93, 89, 86, 82, 79, 76, 73, 70, 68, 66, 64, 63,
+        61, 60, 59, 58, 57, 57, 56, 55, 54, 54, 53, 52, 52, 52, 52, 52, 77, 80,
+        84, 87, 91, 87, 84, 81, 78, 75, 72, 70, 67, 65, 64, 62, 60, 59, 58, 57,
+        56, 55, 55, 54, 53, 53, 52, 51, 51, 51, 51, 51, 75, 78, 82, 85, 89, 86,
+        83, 80, 77, 74, 72, 69, 66, 65, 63, 61, 59, 58, 57, 56, 55, 54, 53, 53,
+        52, 51, 51, 50, 50, 50, 50, 50, 73, 76, 80, 83, 87, 84, 81, 79, 76, 73,
+        71, 68, 66, 64, 62, 60, 58, 57, 56, 55, 54, 53, 52, 52, 51, 50, 50, 49,
+        48, 48, 48, 48, 71, 74, 78, 81, 85, 82, 80, 77, 75, 72, 70, 67, 65, 63,
+        61, 59, 58, 57, 55, 54, 53, 52, 52, 51, 50, 49, 49, 48, 48, 48, 48, 48,
+        69, 72, 76, 79, 82, 80, 78, 76, 73, 71, 69, 66, 64, 62, 60, 59, 57, 56,
+        55, 53, 52, 52, 51, 50, 49, 49, 48, 47, 47, 47, 47, 47, 67, 71, 74, 77,
+        80, 78, 76, 74, 72, 70, 68, 65, 63, 61, 60, 58, 56, 55, 54, 53, 52, 51,
+        50, 49, 48, 48, 47, 47, 46, 46, 46, 46, 65, 69, 72, 75, 78, 76, 74, 72,
+        71, 69, 66, 64, 62, 61, 59, 57, 56, 54, 53, 52, 51, 50, 49, 48, 48, 47,
+        46, 46, 45, 45, 45, 45, 64, 67, 70, 73, 76, 74, 72, 71, 69, 67, 65, 63,
+        61, 60, 58, 57, 55, 54, 53, 51, 50, 49, 49, 48, 47, 46, 46, 45, 45, 45,
+        45, 45, 62, 65, 68, 71, 74, 72, 71, 69, 68, 66, 64, 62, 60, 59, 57, 56,
+        54, 53, 52, 51, 50, 49, 48, 47, 46, 46, 45, 45, 44, 44, 44, 44, 60, 63,
+        66, 69, 72, 70, 69, 68, 66, 64, 63, 61, 59, 58, 57, 55, 54, 52, 51, 50,
+        49, 48, 47, 47, 46, 45, 45, 44, 43, 43, 43, 43, 59, 62, 64, 67, 70, 68,
+        67, 66, 65, 63, 62, 60, 59, 57, 56, 54, 53, 52, 51, 50, 48, 48, 47, 46,
+        45, 45, 44, 43, 43, 43, 43, 43, 59, 62, 64, 67, 70, 68, 67, 66, 65, 63,
+        62, 60, 59, 57, 56, 54, 53, 52, 51, 50, 48, 48, 47, 46, 45, 45, 44, 43,
+        43, 43, 43, 43, 59, 62, 64, 67, 70, 68, 67, 66, 65, 63, 62, 60, 59, 57,
+        56, 54, 53, 52, 51, 50, 48, 48, 47, 46, 45, 45, 44, 43, 43, 43, 43, 43,
+        59, 62, 64, 67, 70, 68, 67, 66, 65, 63, 62, 60, 59, 57, 56, 54, 53, 52,
+        51, 50, 48, 48, 47, 46, 45, 45, 44, 43, 43, 43, 43, 43 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 59, 40, 29, 59, 42, 33, 27, 40, 33, 26, 23, 29, 27, 23, 21,
+        /* Size 8 */
+        64, 82, 76, 60, 48, 39, 34, 30, 82, 73, 75, 66, 54, 44, 38, 33, 76, 75,
+        57, 50, 44, 39, 35, 31, 60, 66, 50, 42, 37, 34, 31, 29, 48, 54, 44, 37,
+        33, 31, 29, 27, 39, 44, 39, 34, 31, 29, 27, 26, 34, 38, 35, 31, 29, 27,
+        26, 25, 30, 33, 31, 29, 27, 26, 25, 24,
+        /* Size 16 */
+        64, 73, 82, 79, 76, 68, 60, 54, 48, 44, 39, 36, 34, 32, 30, 30, 73, 75,
+        77, 76, 76, 69, 63, 57, 51, 46, 42, 39, 36, 34, 31, 31, 82, 77, 73, 74,
+        75, 71, 66, 60, 54, 49, 44, 41, 38, 35, 33, 33, 79, 76, 74, 70, 66, 62,
+        58, 54, 49, 45, 42, 39, 36, 34, 32, 32, 76, 76, 75, 66, 57, 53, 50, 47,
+        44, 42, 39, 37, 35, 33, 31, 31, 68, 69, 71, 62, 53, 50, 46, 43, 41, 39,
+        37, 35, 33, 32, 30, 30, 60, 63, 66, 58, 50, 46, 42, 40, 37, 36, 34, 33,
+        31, 30, 29, 29, 54, 57, 60, 54, 47, 43, 40, 37, 35, 34, 32, 31, 30, 29,
+        28, 28, 48, 51, 54, 49, 44, 41, 37, 35, 33, 32, 31, 30, 29, 28, 27, 27,
+        44, 46, 49, 45, 42, 39, 36, 34, 32, 31, 30, 29, 28, 27, 27, 27, 39, 42,
+        44, 42, 39, 37, 34, 32, 31, 30, 29, 28, 27, 27, 26, 26, 36, 39, 41, 39,
+        37, 35, 33, 31, 30, 29, 28, 27, 26, 26, 25, 25, 34, 36, 38, 36, 35, 33,
+        31, 30, 29, 28, 27, 26, 26, 25, 25, 25, 32, 34, 35, 34, 33, 32, 30, 29,
+        28, 27, 27, 26, 25, 25, 25, 25, 30, 31, 33, 32, 31, 30, 29, 28, 27, 27,
+        26, 25, 25, 25, 24, 24, 30, 31, 33, 32, 31, 30, 29, 28, 27, 27, 26, 25,
+        25, 25, 24, 24,
+        /* Size 32 */
+        64, 68, 73, 77, 82, 80, 79, 78, 76, 72, 68, 64, 60, 57, 54, 51, 48, 46,
+        44, 41, 39, 38, 36, 35, 34, 33, 32, 31, 30, 30, 30, 30, 68, 71, 74, 77,
+        79, 79, 78, 77, 76, 72, 69, 65, 62, 59, 56, 52, 49, 47, 45, 43, 41, 39,
+        38, 36, 35, 34, 33, 32, 31, 31, 31, 31, 73, 74, 75, 76, 77, 77, 76, 76,
+        76, 73, 69, 66, 63, 60, 57, 54, 51, 49, 46, 44, 42, 40, 39, 37, 36, 35,
+        34, 32, 31, 31, 31, 31, 77, 77, 76, 75, 75, 75, 75, 75, 76, 73, 70, 67,
+        64, 61, 58, 55, 52, 50, 48, 45, 43, 41, 40, 38, 37, 35, 34, 33, 32, 32,
+        32, 32, 82, 79, 77, 75, 73, 73, 74, 75, 75, 73, 71, 68, 66, 63, 60, 57,
+        54, 51, 49, 47, 44, 43, 41, 39, 38, 36, 35, 34, 33, 33, 33, 33, 80, 79,
+        77, 75, 73, 73, 72, 71, 71, 68, 66, 64, 62, 59, 57, 54, 51, 49, 47, 45,
+        43, 41, 40, 38, 37, 36, 35, 34, 32, 32, 32, 32, 79, 78, 76, 75, 74, 72,
+        70, 68, 66, 64, 62, 60, 58, 56, 54, 51, 49, 47, 45, 44, 42, 40, 39, 37,
+        36, 35, 34, 33, 32, 32, 32, 32, 78, 77, 76, 75, 75, 71, 68, 65, 61, 60,
+        58, 56, 54, 52, 50, 49, 47, 45, 44, 42, 40, 39, 38, 37, 35, 34, 33, 33,
+        32, 32, 32, 32, 76, 76, 76, 76, 75, 71, 66, 61, 57, 55, 53, 52, 50, 49,
+        47, 46, 44, 43, 42, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 31, 31, 31,
+        72, 72, 73, 73, 73, 68, 64, 60, 55, 53, 52, 50, 48, 47, 45, 44, 43, 41,
+        40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 31, 31, 31, 31, 68, 69, 69, 70,
+        71, 66, 62, 58, 53, 52, 50, 48, 46, 45, 43, 42, 41, 40, 39, 38, 37, 36,
+        35, 34, 33, 32, 32, 31, 30, 30, 30, 30, 64, 65, 66, 67, 68, 64, 60, 56,
+        52, 50, 48, 46, 44, 43, 41, 40, 39, 38, 37, 36, 35, 35, 34, 33, 32, 32,
+        31, 30, 30, 30, 30, 30, 60, 62, 63, 64, 66, 62, 58, 54, 50, 48, 46, 44,
+        42, 41, 40, 38, 37, 37, 36, 35, 34, 33, 33, 32, 31, 31, 30, 30, 29, 29,
+        29, 29, 57, 59, 60, 61, 63, 59, 56, 52, 49, 47, 45, 43, 41, 40, 38, 37,
+        36, 36, 35, 34, 33, 33, 32, 31, 31, 30, 30, 29, 29, 29, 29, 29, 54, 56,
+        57, 58, 60, 57, 54, 50, 47, 45, 43, 41, 40, 38, 37, 36, 35, 35, 34, 33,
+        32, 32, 31, 31, 30, 30, 29, 29, 28, 28, 28, 28, 51, 52, 54, 55, 57, 54,
+        51, 49, 46, 44, 42, 40, 38, 37, 36, 35, 34, 34, 33, 32, 32, 31, 31, 30,
+        30, 29, 29, 28, 28, 28, 28, 28, 48, 49, 51, 52, 54, 51, 49, 47, 44, 43,
+        41, 39, 37, 36, 35, 34, 33, 33, 32, 31, 31, 30, 30, 29, 29, 28, 28, 28,
+        27, 27, 27, 27, 46, 47, 49, 50, 51, 49, 47, 45, 43, 41, 40, 38, 37, 36,
+        35, 34, 33, 32, 31, 31, 30, 30, 29, 29, 28, 28, 28, 27, 27, 27, 27, 27,
+        44, 45, 46, 48, 49, 47, 45, 44, 42, 40, 39, 37, 36, 35, 34, 33, 32, 31,
+        31, 30, 30, 29, 29, 28, 28, 28, 27, 27, 27, 27, 27, 27, 41, 43, 44, 45,
+        47, 45, 44, 42, 40, 39, 38, 36, 35, 34, 33, 32, 31, 31, 30, 30, 29, 29,
+        28, 28, 28, 27, 27, 27, 26, 26, 26, 26, 39, 41, 42, 43, 44, 43, 42, 40,
+        39, 38, 37, 35, 34, 33, 32, 32, 31, 30, 30, 29, 29, 28, 28, 27, 27, 27,
+        27, 26, 26, 26, 26, 26, 38, 39, 40, 41, 43, 41, 40, 39, 38, 37, 36, 35,
+        33, 33, 32, 31, 30, 30, 29, 29, 28, 28, 27, 27, 27, 27, 26, 26, 26, 26,
+        26, 26, 36, 38, 39, 40, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 31,
+        30, 29, 29, 28, 28, 27, 27, 27, 26, 26, 26, 26, 25, 25, 25, 25, 35, 36,
+        37, 38, 39, 38, 37, 37, 36, 35, 34, 33, 32, 31, 31, 30, 29, 29, 28, 28,
+        27, 27, 27, 26, 26, 26, 26, 25, 25, 25, 25, 25, 34, 35, 36, 37, 38, 37,
+        36, 35, 35, 34, 33, 32, 31, 31, 30, 30, 29, 28, 28, 28, 27, 27, 26, 26,
+        26, 26, 25, 25, 25, 25, 25, 25, 33, 34, 35, 35, 36, 36, 35, 34, 34, 33,
+        32, 32, 31, 30, 30, 29, 28, 28, 28, 27, 27, 27, 26, 26, 26, 25, 25, 25,
+        25, 25, 25, 25, 32, 33, 34, 34, 35, 35, 34, 33, 33, 32, 32, 31, 30, 30,
+        29, 29, 28, 28, 27, 27, 27, 26, 26, 26, 25, 25, 25, 25, 25, 25, 25, 25,
+        31, 32, 32, 33, 34, 34, 33, 33, 32, 31, 31, 30, 30, 29, 29, 28, 28, 27,
+        27, 27, 26, 26, 26, 25, 25, 25, 25, 25, 24, 24, 24, 24, 30, 31, 31, 32,
+        33, 32, 32, 32, 31, 31, 30, 30, 29, 29, 28, 28, 27, 27, 27, 26, 26, 26,
+        25, 25, 25, 25, 25, 24, 24, 24, 24, 24, 30, 31, 31, 32, 33, 32, 32, 32,
+        31, 31, 30, 30, 29, 29, 28, 28, 27, 27, 27, 26, 26, 26, 25, 25, 25, 25,
+        25, 24, 24, 24, 24, 24, 30, 31, 31, 32, 33, 32, 32, 32, 31, 31, 30, 30,
+        29, 29, 28, 28, 27, 27, 27, 26, 26, 26, 25, 25, 25, 25, 25, 24, 24, 24,
+        24, 24, 30, 31, 31, 32, 33, 32, 32, 32, 31, 31, 30, 30, 29, 29, 28, 28,
+        27, 27, 27, 26, 26, 26, 25, 25, 25, 25, 25, 24, 24, 24, 24, 24 },
+      { /* Intra matrices */
+        /* Size 4 */
+        141, 130, 85, 60, 130, 91, 70, 56, 85, 70, 55, 48, 60, 56, 48, 43,
+        /* Size 8 */
+        120, 155, 144, 113, 88, 72, 61, 53, 155, 137, 143, 124, 100, 82, 68, 59,
+        144, 143, 106, 93, 82, 71, 63, 56, 113, 124, 93, 77, 68, 62, 56, 52, 88,
+        100, 82, 68, 60, 55, 51, 48, 72, 82, 71, 62, 55, 51, 48, 46, 61, 68, 63,
+        56, 51, 48, 45, 44, 53, 59, 56, 52, 48, 46, 44, 42,
+        /* Size 16 */
+        124, 142, 160, 154, 149, 133, 117, 104, 91, 82, 74, 68, 63, 59, 55, 55,
+        142, 146, 150, 149, 148, 135, 122, 110, 97, 88, 79, 73, 66, 62, 58, 58,
+        160, 150, 141, 144, 147, 137, 128, 115, 103, 94, 84, 77, 70, 66, 61, 61,
+        154, 149, 144, 136, 128, 120, 112, 103, 94, 86, 79, 73, 67, 63, 59, 59,
+        149, 148, 147, 128, 109, 103, 96, 90, 84, 79, 73, 69, 64, 61, 58, 58,
+        133, 135, 137, 120, 103, 95, 87, 82, 77, 73, 68, 65, 61, 58, 56, 56,
+        117, 122, 128, 112, 96, 87, 79, 74, 70, 67, 63, 61, 58, 56, 53, 53, 104,
+        110, 115, 103, 90, 82, 74, 70, 66, 63, 60, 58, 55, 53, 52, 52, 91, 97,
+        103, 94, 84, 77, 70, 66, 62, 59, 57, 55, 53, 51, 50, 50, 82, 88, 94, 86,
+        79, 73, 67, 63, 59, 57, 54, 53, 51, 50, 48, 48, 74, 79, 84, 79, 73, 68,
+        63, 60, 57, 54, 52, 51, 49, 48, 47, 47, 68, 73, 77, 73, 69, 65, 61, 58,
+        55, 53, 51, 49, 48, 47, 46, 46, 63, 66, 70, 67, 64, 61, 58, 55, 53, 51,
+        49, 48, 47, 46, 45, 45, 59, 62, 66, 63, 61, 58, 56, 53, 51, 50, 48, 47,
+        46, 45, 44, 44, 55, 58, 61, 59, 58, 56, 53, 52, 50, 48, 47, 46, 45, 44,
+        44, 44, 55, 58, 61, 59, 58, 56, 53, 52, 50, 48, 47, 46, 45, 44, 44, 44,
+        /* Size 32 */
+        126, 135, 144, 153, 162, 159, 157, 154, 151, 143, 135, 127, 118, 112,
+        105, 99, 92, 88, 84, 79, 75, 72, 69, 66, 63, 62, 60, 58, 56, 56, 56, 56,
+        135, 141, 146, 152, 157, 156, 154, 152, 151, 143, 136, 129, 121, 115,
+        108, 102, 96, 91, 87, 82, 78, 75, 71, 68, 65, 63, 61, 59, 57, 57, 57,
+        57, 144, 146, 148, 151, 153, 152, 151, 151, 150, 144, 137, 130, 124,
+        118, 111, 105, 99, 94, 89, 85, 80, 77, 74, 71, 67, 65, 63, 61, 59, 59,
+        59, 59, 153, 152, 151, 149, 148, 148, 149, 149, 150, 144, 138, 132, 127,
+        120, 114, 108, 102, 97, 92, 87, 83, 79, 76, 73, 69, 67, 65, 63, 60, 60,
+        60, 60, 162, 157, 153, 148, 143, 145, 146, 148, 149, 144, 139, 134, 130,
+        123, 117, 111, 105, 100, 95, 90, 85, 82, 78, 75, 71, 69, 67, 64, 62, 62,
+        62, 62, 159, 156, 152, 148, 145, 143, 142, 141, 139, 135, 130, 126, 121,
+        116, 111, 105, 100, 96, 91, 87, 83, 79, 76, 73, 70, 68, 65, 63, 61, 61,
+        61, 61, 157, 154, 151, 149, 146, 142, 138, 134, 130, 126, 122, 118, 113,
+        109, 104, 100, 95, 91, 88, 84, 80, 77, 74, 71, 68, 66, 64, 62, 60, 60,
+        60, 60, 154, 152, 151, 149, 148, 141, 134, 127, 120, 117, 113, 109, 105,
+        102, 98, 94, 90, 87, 84, 81, 77, 75, 72, 69, 67, 65, 63, 61, 59, 59, 59,
+        59, 151, 151, 150, 150, 149, 139, 130, 120, 111, 108, 104, 101, 97, 94,
+        91, 89, 86, 83, 80, 77, 75, 72, 70, 68, 65, 64, 62, 60, 58, 58, 58, 58,
+        143, 143, 144, 144, 144, 135, 126, 117, 108, 104, 100, 97, 93, 90, 87,
+        85, 82, 79, 77, 74, 72, 70, 68, 66, 64, 62, 61, 59, 57, 57, 57, 57, 135,
+        136, 137, 138, 139, 130, 122, 113, 104, 100, 96, 93, 89, 86, 84, 81, 78,
+        76, 74, 72, 69, 68, 66, 64, 62, 61, 59, 58, 56, 56, 56, 56, 127, 129,
+        130, 132, 134, 126, 118, 109, 101, 97, 93, 88, 84, 82, 80, 77, 75, 73,
+        71, 69, 67, 65, 64, 62, 60, 59, 58, 57, 55, 55, 55, 55, 118, 121, 124,
+        127, 130, 121, 113, 105, 97, 93, 89, 84, 80, 78, 76, 73, 71, 69, 68, 66,
+        64, 63, 62, 60, 59, 58, 57, 55, 54, 54, 54, 54, 112, 115, 118, 120, 123,
+        116, 109, 102, 94, 90, 86, 82, 78, 76, 73, 71, 69, 67, 66, 64, 63, 61,
+        60, 59, 58, 56, 55, 54, 53, 53, 53, 53, 105, 108, 111, 114, 117, 111,
+        104, 98, 91, 87, 84, 80, 76, 73, 71, 69, 67, 65, 64, 62, 61, 60, 59, 57,
+        56, 55, 54, 53, 52, 52, 52, 52, 99, 102, 105, 108, 111, 105, 100, 94,
+        89, 85, 81, 77, 73, 71, 69, 67, 65, 63, 62, 60, 59, 58, 57, 56, 55, 54,
+        53, 52, 51, 51, 51, 51, 92, 96, 99, 102, 105, 100, 95, 90, 86, 82, 78,
+        75, 71, 69, 67, 65, 63, 61, 60, 59, 57, 56, 55, 55, 54, 53, 52, 51, 51,
+        51, 51, 51, 88, 91, 94, 97, 100, 96, 91, 87, 83, 79, 76, 73, 69, 67, 65,
+        63, 61, 60, 59, 57, 56, 55, 54, 54, 53, 52, 51, 51, 50, 50, 50, 50, 84,
+        87, 89, 92, 95, 91, 88, 84, 80, 77, 74, 71, 68, 66, 64, 62, 60, 59, 58,
+        56, 55, 54, 53, 53, 52, 51, 50, 50, 49, 49, 49, 49, 79, 82, 85, 87, 90,
+        87, 84, 81, 77, 74, 72, 69, 66, 64, 62, 60, 59, 57, 56, 55, 54, 53, 52,
+        52, 51, 50, 50, 49, 48, 48, 48, 48, 75, 78, 80, 83, 85, 83, 80, 77, 75,
+        72, 69, 67, 64, 63, 61, 59, 57, 56, 55, 54, 53, 52, 51, 51, 50, 49, 49,
+        48, 48, 48, 48, 48, 72, 75, 77, 79, 82, 79, 77, 75, 72, 70, 68, 65, 63,
+        61, 60, 58, 56, 55, 54, 53, 52, 51, 51, 50, 49, 49, 48, 48, 47, 47, 47,
+        47, 69, 71, 74, 76, 78, 76, 74, 72, 70, 68, 66, 64, 62, 60, 59, 57, 55,
+        54, 53, 52, 51, 51, 50, 49, 49, 48, 48, 47, 47, 47, 47, 47, 66, 68, 71,
+        73, 75, 73, 71, 69, 68, 66, 64, 62, 60, 59, 57, 56, 55, 54, 53, 52, 51,
+        50, 49, 49, 48, 48, 47, 47, 46, 46, 46, 46, 63, 65, 67, 69, 71, 70, 68,
+        67, 65, 64, 62, 60, 59, 58, 56, 55, 54, 53, 52, 51, 50, 49, 49, 48, 47,
+        47, 47, 46, 46, 46, 46, 46, 62, 63, 65, 67, 69, 68, 66, 65, 64, 62, 61,
+        59, 58, 56, 55, 54, 53, 52, 51, 50, 49, 49, 48, 48, 47, 47, 46, 46, 45,
+        45, 45, 45, 60, 61, 63, 65, 67, 65, 64, 63, 62, 61, 59, 58, 57, 55, 54,
+        53, 52, 51, 50, 50, 49, 48, 48, 47, 47, 46, 46, 45, 45, 45, 45, 45, 58,
+        59, 61, 63, 64, 63, 62, 61, 60, 59, 58, 57, 55, 54, 53, 52, 51, 51, 50,
+        49, 48, 48, 47, 47, 46, 46, 45, 45, 45, 45, 45, 45, 56, 57, 59, 60, 62,
+        61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 51, 50, 49, 48, 48, 47, 47,
+        46, 46, 45, 45, 45, 44, 44, 44, 44, 56, 57, 59, 60, 62, 61, 60, 59, 58,
+        57, 56, 55, 54, 53, 52, 51, 51, 50, 49, 48, 48, 47, 47, 46, 46, 45, 45,
+        45, 44, 44, 44, 44, 56, 57, 59, 60, 62, 61, 60, 59, 58, 57, 56, 55, 54,
+        53, 52, 51, 51, 50, 49, 48, 48, 47, 47, 46, 46, 45, 45, 45, 44, 44, 44,
+        44, 56, 57, 59, 60, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 51,
+        50, 49, 48, 48, 47, 47, 46, 46, 45, 45, 45, 44, 44, 44, 44 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 44, 41, 34, 44, 38, 36, 33, 41, 36, 30, 27, 34, 33, 27, 25,
+        /* Size 8 */
+        64, 77, 47, 45, 42, 39, 36, 33, 77, 56, 46, 50, 49, 45, 41, 37, 47, 46,
+        41, 42, 42, 40, 38, 35, 45, 50, 42, 38, 37, 36, 34, 32, 42, 49, 42, 37,
+        34, 32, 31, 30, 39, 45, 40, 36, 32, 30, 29, 28, 36, 41, 38, 34, 31, 29,
+        27, 26, 33, 37, 35, 32, 30, 28, 26, 25,
+        /* Size 16 */
+        64, 71, 77, 62, 47, 46, 45, 43, 42, 41, 39, 37, 36, 34, 33, 33, 71, 69,
+        67, 57, 46, 47, 47, 46, 46, 44, 42, 40, 38, 37, 35, 35, 77, 67, 56, 51,
+        46, 48, 50, 49, 49, 47, 45, 43, 41, 39, 37, 37, 62, 57, 51, 47, 43, 45,
+        46, 46, 45, 44, 43, 41, 40, 38, 36, 36, 47, 46, 46, 43, 41, 41, 42, 42,
+        42, 41, 40, 39, 38, 37, 35, 35, 46, 47, 48, 45, 41, 41, 40, 40, 39, 39,
+        38, 37, 36, 35, 34, 34, 45, 47, 50, 46, 42, 40, 38, 37, 37, 36, 36, 35,
+        34, 33, 32, 32, 43, 46, 49, 46, 42, 40, 37, 36, 35, 35, 34, 33, 33, 32,
+        31, 31, 42, 46, 49, 45, 42, 39, 37, 35, 34, 33, 32, 32, 31, 30, 30, 30,
+        41, 44, 47, 44, 41, 39, 36, 35, 33, 32, 31, 31, 30, 29, 29, 29, 39, 42,
+        45, 43, 40, 38, 36, 34, 32, 31, 30, 30, 29, 28, 28, 28, 37, 40, 43, 41,
+        39, 37, 35, 33, 32, 31, 30, 29, 28, 28, 27, 27, 36, 38, 41, 40, 38, 36,
+        34, 33, 31, 30, 29, 28, 27, 27, 26, 26, 34, 37, 39, 38, 37, 35, 33, 32,
+        30, 29, 28, 28, 27, 26, 26, 26, 33, 35, 37, 36, 35, 34, 32, 31, 30, 29,
+        28, 27, 26, 26, 25, 25, 33, 35, 37, 36, 35, 34, 32, 31, 30, 29, 28, 27,
+        26, 26, 25, 25,
+        /* Size 32 */
+        64, 67, 71, 74, 77, 69, 62, 54, 47, 46, 46, 45, 45, 44, 43, 43, 42, 42,
+        41, 40, 39, 38, 37, 36, 36, 35, 34, 33, 33, 33, 33, 33, 67, 68, 70, 71,
+        72, 66, 59, 53, 47, 46, 46, 46, 46, 45, 45, 44, 44, 43, 42, 41, 40, 40,
+        39, 38, 37, 36, 35, 35, 34, 34, 34, 34, 71, 70, 69, 68, 67, 62, 57, 51,
+        46, 47, 47, 47, 47, 47, 46, 46, 46, 45, 44, 43, 42, 41, 40, 39, 38, 38,
+        37, 36, 35, 35, 35, 35, 74, 71, 68, 65, 61, 58, 54, 50, 46, 47, 47, 48,
+        49, 48, 48, 47, 47, 46, 45, 44, 44, 43, 42, 41, 40, 39, 38, 37, 36, 36,
+        36, 36, 77, 72, 67, 61, 56, 54, 51, 49, 46, 47, 48, 49, 50, 50, 49, 49,
+        49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 37, 37, 37, 69, 66,
+        62, 58, 54, 51, 49, 47, 45, 46, 46, 47, 48, 48, 47, 47, 47, 46, 45, 45,
+        44, 43, 42, 41, 40, 39, 39, 38, 37, 37, 37, 37, 62, 59, 57, 54, 51, 49,
+        47, 45, 43, 44, 45, 45, 46, 46, 46, 45, 45, 45, 44, 43, 43, 42, 41, 40,
+        40, 39, 38, 37, 36, 36, 36, 36, 54, 53, 51, 50, 49, 47, 45, 44, 42, 43,
+        43, 43, 44, 44, 44, 44, 44, 43, 43, 42, 42, 41, 40, 39, 39, 38, 37, 36,
+        36, 36, 36, 36, 47, 47, 46, 46, 46, 45, 43, 42, 41, 41, 41, 42, 42, 42,
+        42, 42, 42, 42, 41, 41, 40, 40, 39, 38, 38, 37, 37, 36, 35, 35, 35, 35,
+        46, 46, 47, 47, 47, 46, 44, 43, 41, 41, 41, 41, 41, 41, 41, 41, 41, 40,
+        40, 40, 39, 39, 38, 37, 37, 36, 36, 35, 34, 34, 34, 34, 46, 46, 47, 47,
+        48, 46, 45, 43, 41, 41, 41, 40, 40, 40, 40, 40, 39, 39, 39, 38, 38, 37,
+        37, 36, 36, 35, 35, 34, 34, 34, 34, 34, 45, 46, 47, 48, 49, 47, 45, 43,
+        42, 41, 40, 40, 39, 39, 39, 38, 38, 38, 37, 37, 37, 36, 36, 36, 35, 35,
+        34, 34, 33, 33, 33, 33, 45, 46, 47, 49, 50, 48, 46, 44, 42, 41, 40, 39,
+        38, 38, 37, 37, 37, 36, 36, 36, 36, 35, 35, 35, 34, 34, 33, 33, 32, 32,
+        32, 32, 44, 45, 47, 48, 50, 48, 46, 44, 42, 41, 40, 39, 38, 37, 37, 36,
+        36, 36, 35, 35, 35, 34, 34, 34, 33, 33, 33, 32, 32, 32, 32, 32, 43, 45,
+        46, 48, 49, 47, 46, 44, 42, 41, 40, 39, 37, 37, 36, 36, 35, 35, 35, 34,
+        34, 34, 33, 33, 33, 32, 32, 32, 31, 31, 31, 31, 43, 44, 46, 47, 49, 47,
+        45, 44, 42, 41, 40, 38, 37, 36, 36, 35, 35, 34, 34, 34, 33, 33, 33, 32,
+        32, 32, 31, 31, 31, 31, 31, 31, 42, 44, 46, 47, 49, 47, 45, 44, 42, 41,
+        39, 38, 37, 36, 35, 35, 34, 34, 33, 33, 32, 32, 32, 31, 31, 31, 30, 30,
+        30, 30, 30, 30, 42, 43, 45, 46, 48, 46, 45, 43, 42, 40, 39, 38, 36, 36,
+        35, 34, 34, 33, 33, 32, 32, 32, 31, 31, 31, 30, 30, 30, 29, 29, 29, 29,
+        41, 42, 44, 45, 47, 45, 44, 43, 41, 40, 39, 37, 36, 35, 35, 34, 33, 33,
+        32, 32, 31, 31, 31, 30, 30, 30, 29, 29, 29, 29, 29, 29, 40, 41, 43, 44,
+        46, 45, 43, 42, 41, 40, 38, 37, 36, 35, 34, 34, 33, 32, 32, 31, 31, 30,
+        30, 30, 29, 29, 29, 29, 28, 28, 28, 28, 39, 40, 42, 44, 45, 44, 43, 42,
+        40, 39, 38, 37, 36, 35, 34, 33, 32, 32, 31, 31, 30, 30, 30, 29, 29, 29,
+        28, 28, 28, 28, 28, 28, 38, 40, 41, 43, 44, 43, 42, 41, 40, 39, 37, 36,
+        35, 34, 34, 33, 32, 32, 31, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 28,
+        28, 28, 37, 39, 40, 42, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 33,
+        32, 31, 31, 30, 30, 29, 29, 29, 28, 28, 28, 27, 27, 27, 27, 27, 36, 38,
+        39, 41, 42, 41, 40, 39, 38, 37, 36, 36, 35, 34, 33, 32, 31, 31, 30, 30,
+        29, 29, 29, 28, 28, 28, 27, 27, 27, 27, 27, 27, 36, 37, 38, 40, 41, 40,
+        40, 39, 38, 37, 36, 35, 34, 33, 33, 32, 31, 31, 30, 29, 29, 29, 28, 28,
+        27, 27, 27, 27, 26, 26, 26, 26, 35, 36, 38, 39, 40, 39, 39, 38, 37, 36,
+        35, 35, 34, 33, 32, 32, 31, 30, 30, 29, 29, 28, 28, 28, 27, 27, 27, 26,
+        26, 26, 26, 26, 34, 35, 37, 38, 39, 39, 38, 37, 37, 36, 35, 34, 33, 33,
+        32, 31, 30, 30, 29, 29, 28, 28, 28, 27, 27, 27, 26, 26, 26, 26, 26, 26,
+        33, 35, 36, 37, 38, 38, 37, 36, 36, 35, 34, 34, 33, 32, 32, 31, 30, 30,
+        29, 29, 28, 28, 27, 27, 27, 26, 26, 26, 26, 26, 26, 26, 33, 34, 35, 36,
+        37, 37, 36, 36, 35, 34, 34, 33, 32, 32, 31, 31, 30, 29, 29, 28, 28, 28,
+        27, 27, 26, 26, 26, 26, 25, 25, 25, 25, 33, 34, 35, 36, 37, 37, 36, 36,
+        35, 34, 34, 33, 32, 32, 31, 31, 30, 29, 29, 28, 28, 28, 27, 27, 26, 26,
+        26, 26, 25, 25, 25, 25, 33, 34, 35, 36, 37, 37, 36, 36, 35, 34, 34, 33,
+        32, 32, 31, 31, 30, 29, 29, 28, 28, 28, 27, 27, 26, 26, 26, 26, 25, 25,
+        25, 25, 33, 34, 35, 36, 37, 37, 36, 36, 35, 34, 34, 33, 32, 32, 31, 31,
+        30, 29, 29, 28, 28, 28, 27, 27, 26, 26, 26, 26, 25, 25, 25, 25 },
+      { /* Intra matrices */
+        /* Size 4 */
+        125, 84, 78, 64, 84, 72, 68, 61, 78, 68, 56, 51, 64, 61, 51, 45,
+        /* Size 8 */
+        117, 141, 84, 80, 76, 69, 63, 57, 141, 102, 83, 90, 88, 81, 73, 66, 84,
+        83, 73, 75, 75, 72, 67, 62, 80, 90, 75, 68, 65, 63, 60, 57, 76, 88, 75,
+        65, 60, 57, 54, 52, 69, 81, 72, 63, 57, 53, 50, 48, 63, 73, 67, 60, 54,
+        50, 47, 45, 57, 66, 62, 57, 52, 48, 45, 43,
+        /* Size 16 */
+        119, 132, 144, 115, 86, 84, 82, 79, 77, 74, 71, 67, 64, 61, 58, 58, 132,
+        128, 124, 105, 85, 86, 87, 85, 83, 80, 77, 73, 69, 66, 63, 63, 144, 124,
+        104, 94, 84, 88, 92, 91, 89, 86, 83, 79, 75, 71, 68, 68, 115, 105, 94,
+        87, 79, 82, 84, 83, 83, 80, 78, 75, 72, 69, 65, 65, 86, 85, 84, 79, 74,
+        75, 76, 76, 76, 75, 73, 71, 68, 66, 63, 63, 84, 86, 88, 82, 75, 74, 73,
+        72, 71, 70, 69, 67, 65, 63, 61, 61, 82, 87, 92, 84, 76, 73, 69, 68, 66,
+        65, 64, 63, 61, 60, 58, 58, 79, 85, 91, 83, 76, 72, 68, 66, 64, 62, 61,
+        60, 58, 57, 56, 56, 77, 83, 89, 83, 76, 71, 66, 64, 61, 59, 58, 57, 55,
+        54, 53, 53, 74, 80, 86, 80, 75, 70, 65, 62, 59, 58, 56, 55, 53, 52, 51,
+        51, 71, 77, 83, 78, 73, 69, 64, 61, 58, 56, 54, 53, 51, 50, 49, 49, 67,
+        73, 79, 75, 71, 67, 63, 60, 57, 55, 53, 51, 50, 49, 48, 48, 64, 69, 75,
+        72, 68, 65, 61, 58, 55, 53, 51, 50, 48, 47, 46, 46, 61, 66, 71, 69, 66,
+        63, 60, 57, 54, 52, 50, 49, 47, 46, 45, 45, 58, 63, 68, 65, 63, 61, 58,
+        56, 53, 51, 49, 48, 46, 45, 44, 44, 58, 63, 68, 65, 63, 61, 58, 56, 53,
+        51, 49, 48, 46, 45, 44, 44,
+        /* Size 32 */
+        120, 127, 133, 140, 146, 131, 116, 101, 87, 86, 85, 84, 82, 81, 80, 79,
+        78, 76, 75, 73, 71, 70, 68, 66, 65, 63, 62, 60, 59, 59, 59, 59, 127,
+        129, 131, 134, 136, 123, 111, 99, 86, 86, 86, 85, 85, 84, 83, 82, 81,
+        80, 78, 76, 74, 73, 71, 69, 67, 66, 64, 63, 61, 61, 61, 61, 133, 131,
+        129, 127, 126, 116, 106, 96, 86, 86, 87, 87, 88, 87, 86, 85, 84, 83, 81,
+        79, 77, 76, 74, 72, 70, 69, 67, 65, 64, 64, 64, 64, 140, 134, 127, 121,
+        115, 108, 101, 93, 86, 87, 88, 89, 90, 89, 89, 88, 87, 86, 84, 82, 81,
+        79, 77, 75, 73, 71, 70, 68, 66, 66, 66, 66, 146, 136, 126, 115, 105,
+        100, 95, 90, 85, 87, 89, 91, 93, 92, 92, 91, 90, 89, 87, 85, 84, 82, 80,
+        78, 76, 74, 72, 70, 68, 68, 68, 68, 131, 123, 116, 108, 100, 96, 92, 87,
+        83, 84, 86, 87, 89, 88, 88, 88, 87, 86, 84, 83, 81, 79, 78, 76, 74, 72,
+        71, 69, 67, 67, 67, 67, 116, 111, 106, 101, 95, 92, 88, 84, 80, 81, 83,
+        84, 85, 85, 84, 84, 84, 83, 81, 80, 79, 77, 76, 74, 72, 71, 69, 68, 66,
+        66, 66, 66, 101, 99, 96, 93, 90, 87, 84, 81, 78, 78, 79, 80, 81, 81, 81,
+        81, 81, 80, 79, 78, 76, 75, 74, 72, 71, 69, 68, 67, 65, 65, 65, 65, 87,
+        86, 86, 86, 85, 83, 80, 78, 75, 75, 76, 77, 77, 77, 77, 77, 77, 76, 76,
+        75, 74, 73, 72, 70, 69, 68, 67, 65, 64, 64, 64, 64, 86, 86, 86, 87, 87,
+        84, 81, 78, 75, 75, 75, 75, 75, 75, 75, 75, 75, 74, 73, 73, 72, 71, 70,
+        69, 67, 66, 65, 64, 63, 63, 63, 63, 85, 86, 87, 88, 89, 86, 83, 79, 76,
+        75, 75, 74, 73, 73, 73, 73, 72, 72, 71, 70, 70, 69, 68, 67, 66, 65, 63,
+        62, 61, 61, 61, 61, 84, 85, 87, 89, 91, 87, 84, 80, 77, 75, 74, 73, 72,
+        71, 71, 70, 70, 69, 68, 68, 67, 66, 66, 65, 64, 63, 62, 61, 60, 60, 60,
+        60, 82, 85, 88, 90, 93, 89, 85, 81, 77, 75, 73, 72, 70, 69, 68, 68, 67,
+        67, 66, 65, 65, 64, 63, 63, 62, 61, 60, 59, 59, 59, 59, 59, 81, 84, 87,
+        89, 92, 88, 85, 81, 77, 75, 73, 71, 69, 68, 67, 67, 66, 65, 65, 64, 63,
+        63, 62, 61, 61, 60, 59, 58, 57, 57, 57, 57, 80, 83, 86, 89, 92, 88, 84,
+        81, 77, 75, 73, 71, 68, 67, 66, 65, 64, 64, 63, 62, 62, 61, 60, 60, 59,
+        58, 58, 57, 56, 56, 56, 56, 79, 82, 85, 88, 91, 88, 84, 81, 77, 75, 73,
+        70, 68, 67, 65, 64, 63, 62, 62, 61, 60, 59, 59, 58, 58, 57, 56, 56, 55,
+        55, 55, 55, 78, 81, 84, 87, 90, 87, 84, 81, 77, 75, 72, 70, 67, 66, 64,
+        63, 62, 61, 60, 59, 58, 58, 57, 57, 56, 55, 55, 54, 54, 54, 54, 54, 76,
+        80, 83, 86, 89, 86, 83, 80, 76, 74, 72, 69, 67, 65, 64, 62, 61, 60, 59,
+        58, 57, 57, 56, 56, 55, 54, 54, 53, 53, 53, 53, 53, 75, 78, 81, 84, 87,
+        84, 81, 79, 76, 73, 71, 68, 66, 65, 63, 62, 60, 59, 58, 57, 56, 56, 55,
+        55, 54, 53, 53, 52, 52, 52, 52, 52, 73, 76, 79, 82, 85, 83, 80, 78, 75,
+        73, 70, 68, 65, 64, 62, 61, 59, 58, 57, 56, 55, 55, 54, 54, 53, 52, 52,
+        51, 51, 51, 51, 51, 71, 74, 77, 81, 84, 81, 79, 76, 74, 72, 70, 67, 65,
+        63, 62, 60, 58, 57, 56, 55, 54, 54, 53, 52, 52, 51, 51, 50, 50, 50, 50,
+        50, 70, 73, 76, 79, 82, 79, 77, 75, 73, 71, 69, 66, 64, 63, 61, 59, 58,
+        57, 56, 55, 54, 53, 52, 52, 51, 51, 50, 50, 49, 49, 49, 49, 68, 71, 74,
+        77, 80, 78, 76, 74, 72, 70, 68, 66, 63, 62, 60, 59, 57, 56, 55, 54, 53,
+        52, 52, 51, 50, 50, 49, 49, 48, 48, 48, 48, 66, 69, 72, 75, 78, 76, 74,
+        72, 70, 69, 67, 65, 63, 61, 60, 58, 57, 56, 55, 54, 52, 52, 51, 50, 50,
+        49, 49, 48, 48, 48, 48, 48, 65, 67, 70, 73, 76, 74, 72, 71, 69, 67, 66,
+        64, 62, 61, 59, 58, 56, 55, 54, 53, 52, 51, 50, 50, 49, 48, 48, 47, 47,
+        47, 47, 47, 63, 66, 69, 71, 74, 72, 71, 69, 68, 66, 65, 63, 61, 60, 58,
+        57, 55, 54, 53, 52, 51, 51, 50, 49, 48, 48, 47, 47, 46, 46, 46, 46, 62,
+        64, 67, 70, 72, 71, 69, 68, 67, 65, 63, 62, 60, 59, 58, 56, 55, 54, 53,
+        52, 51, 50, 49, 49, 48, 47, 47, 46, 46, 46, 46, 46, 60, 63, 65, 68, 70,
+        69, 68, 67, 65, 64, 62, 61, 59, 58, 57, 56, 54, 53, 52, 51, 50, 50, 49,
+        48, 47, 47, 46, 46, 45, 45, 45, 45, 59, 61, 64, 66, 68, 67, 66, 65, 64,
+        63, 61, 60, 59, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 48, 47, 46, 46,
+        45, 45, 45, 45, 45, 59, 61, 64, 66, 68, 67, 66, 65, 64, 63, 61, 60, 59,
+        57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 48, 47, 46, 46, 45, 45, 45, 45,
+        45, 59, 61, 64, 66, 68, 67, 66, 65, 64, 63, 61, 60, 59, 57, 56, 55, 54,
+        53, 52, 51, 50, 49, 48, 48, 47, 46, 46, 45, 45, 45, 45, 45, 59, 61, 64,
+        66, 68, 67, 66, 65, 64, 63, 61, 60, 59, 57, 56, 55, 54, 53, 52, 51, 50,
+        49, 48, 48, 47, 46, 46, 45, 45, 45, 45, 45 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 60, 41, 31, 60, 44, 35, 29, 41, 35, 29, 26, 31, 29, 26, 24,
+        /* Size 8 */
+        64, 80, 75, 61, 49, 41, 36, 33, 80, 72, 74, 66, 55, 46, 40, 35, 75, 74,
+        57, 51, 46, 41, 37, 34, 61, 66, 51, 43, 39, 36, 34, 32, 49, 55, 46, 39,
+        36, 33, 32, 30, 41, 46, 41, 36, 33, 31, 30, 29, 36, 40, 37, 34, 32, 30,
+        29, 28, 33, 35, 34, 32, 30, 29, 28, 27,
+        /* Size 16 */
+        64, 72, 80, 78, 75, 68, 61, 55, 49, 45, 41, 39, 36, 34, 33, 33, 72, 74,
+        76, 75, 75, 69, 63, 57, 52, 48, 43, 41, 38, 36, 34, 34, 80, 76, 72, 73,
+        74, 70, 66, 60, 55, 50, 46, 43, 40, 37, 35, 35, 78, 75, 73, 69, 66, 62,
+        58, 54, 50, 47, 43, 41, 38, 36, 34, 34, 75, 75, 74, 66, 57, 54, 51, 49,
+        46, 43, 41, 39, 37, 35, 34, 34, 68, 69, 70, 62, 54, 51, 47, 45, 43, 41,
+        39, 37, 35, 34, 33, 33, 61, 63, 66, 58, 51, 47, 43, 41, 39, 38, 36, 35,
+        34, 33, 32, 32, 55, 57, 60, 54, 49, 45, 41, 39, 37, 36, 35, 34, 33, 32,
+        31, 31, 49, 52, 55, 50, 46, 43, 39, 37, 36, 34, 33, 32, 32, 31, 30, 30,
+        45, 48, 50, 47, 43, 41, 38, 36, 34, 33, 32, 31, 31, 30, 30, 30, 41, 43,
+        46, 43, 41, 39, 36, 35, 33, 32, 31, 31, 30, 29, 29, 29, 39, 41, 43, 41,
+        39, 37, 35, 34, 32, 31, 31, 30, 29, 29, 28, 28, 36, 38, 40, 38, 37, 35,
+        34, 33, 32, 31, 30, 29, 29, 28, 28, 28, 34, 36, 37, 36, 35, 34, 33, 32,
+        31, 30, 29, 29, 28, 28, 28, 28, 33, 34, 35, 34, 34, 33, 32, 31, 30, 30,
+        29, 28, 28, 28, 27, 27, 33, 34, 35, 34, 34, 33, 32, 31, 30, 30, 29, 28,
+        28, 28, 27, 27,
+        /* Size 32 */
+        64, 68, 72, 76, 80, 79, 78, 77, 75, 72, 68, 64, 61, 58, 55, 52, 49, 47,
+        45, 43, 41, 40, 39, 37, 36, 35, 34, 33, 33, 33, 33, 33, 68, 71, 73, 76,
+        78, 77, 77, 76, 75, 72, 69, 65, 62, 59, 56, 53, 50, 48, 46, 44, 42, 41,
+        40, 38, 37, 36, 35, 34, 33, 33, 33, 33, 72, 73, 74, 75, 76, 76, 75, 75,
+        75, 72, 69, 66, 63, 60, 57, 55, 52, 50, 48, 46, 43, 42, 41, 39, 38, 37,
+        36, 35, 34, 34, 34, 34, 76, 76, 75, 75, 74, 74, 74, 74, 75, 72, 70, 67,
+        64, 62, 59, 56, 53, 51, 49, 47, 45, 43, 42, 40, 39, 38, 37, 36, 35, 35,
+        35, 35, 80, 78, 76, 74, 72, 72, 73, 74, 74, 72, 70, 68, 66, 63, 60, 57,
+        55, 52, 50, 48, 46, 44, 43, 41, 40, 38, 37, 36, 35, 35, 35, 35, 79, 77,
+        76, 74, 72, 72, 71, 71, 70, 68, 66, 64, 62, 60, 57, 55, 52, 50, 48, 47,
+        45, 43, 42, 40, 39, 38, 37, 36, 35, 35, 35, 35, 78, 77, 75, 74, 73, 71,
+        69, 68, 66, 64, 62, 60, 58, 56, 54, 52, 50, 49, 47, 45, 43, 42, 41, 39,
+        38, 37, 36, 35, 34, 34, 34, 34, 77, 76, 75, 74, 74, 71, 68, 65, 62, 60,
+        58, 56, 55, 53, 51, 50, 48, 47, 45, 44, 42, 41, 40, 39, 38, 37, 36, 35,
+        34, 34, 34, 34, 75, 75, 75, 75, 74, 70, 66, 62, 57, 56, 54, 53, 51, 50,
+        49, 47, 46, 45, 43, 42, 41, 40, 39, 38, 37, 36, 35, 35, 34, 34, 34, 34,
+        72, 72, 72, 72, 72, 68, 64, 60, 56, 54, 53, 51, 49, 48, 47, 46, 44, 43,
+        42, 41, 40, 39, 38, 37, 36, 35, 35, 34, 33, 33, 33, 33, 68, 69, 69, 70,
+        70, 66, 62, 58, 54, 53, 51, 49, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38,
+        37, 36, 35, 35, 34, 33, 33, 33, 33, 33, 64, 65, 66, 67, 68, 64, 60, 56,
+        53, 51, 49, 47, 45, 44, 43, 42, 41, 40, 39, 38, 38, 37, 36, 35, 35, 34,
+        33, 33, 32, 32, 32, 32, 61, 62, 63, 64, 66, 62, 58, 55, 51, 49, 47, 45,
+        43, 42, 41, 40, 39, 39, 38, 37, 36, 36, 35, 35, 34, 33, 33, 32, 32, 32,
+        32, 32, 58, 59, 60, 62, 63, 60, 56, 53, 50, 48, 46, 44, 42, 41, 40, 39,
+        38, 38, 37, 36, 36, 35, 34, 34, 33, 33, 32, 32, 31, 31, 31, 31, 55, 56,
+        57, 59, 60, 57, 54, 51, 49, 47, 45, 43, 41, 40, 39, 38, 37, 37, 36, 35,
+        35, 34, 34, 33, 33, 32, 32, 31, 31, 31, 31, 31, 52, 53, 55, 56, 57, 55,
+        52, 50, 47, 46, 44, 42, 40, 39, 38, 37, 37, 36, 35, 35, 34, 34, 33, 33,
+        32, 32, 31, 31, 31, 31, 31, 31, 49, 50, 52, 53, 55, 52, 50, 48, 46, 44,
+        43, 41, 39, 38, 37, 37, 36, 35, 34, 34, 33, 33, 32, 32, 32, 31, 31, 31,
+        30, 30, 30, 30, 47, 48, 50, 51, 52, 50, 49, 47, 45, 43, 42, 40, 39, 38,
+        37, 36, 35, 34, 34, 33, 33, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30,
+        45, 46, 48, 49, 50, 48, 47, 45, 43, 42, 41, 39, 38, 37, 36, 35, 34, 34,
+        33, 33, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 43, 44, 46, 47,
+        48, 47, 45, 44, 42, 41, 40, 38, 37, 36, 35, 35, 34, 33, 33, 32, 32, 31,
+        31, 31, 30, 30, 30, 29, 29, 29, 29, 29, 41, 42, 43, 45, 46, 45, 43, 42,
+        41, 40, 39, 38, 36, 36, 35, 34, 33, 33, 32, 32, 31, 31, 31, 30, 30, 30,
+        29, 29, 29, 29, 29, 29, 40, 41, 42, 43, 44, 43, 42, 41, 40, 39, 38, 37,
+        36, 35, 34, 34, 33, 32, 32, 31, 31, 31, 30, 30, 30, 29, 29, 29, 29, 29,
+        29, 29, 39, 40, 41, 42, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 34, 33,
+        32, 32, 31, 31, 31, 30, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28, 37, 38,
+        39, 40, 41, 40, 39, 39, 38, 37, 36, 35, 35, 34, 33, 33, 32, 32, 31, 31,
+        30, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28, 28, 36, 37, 38, 39, 40, 39,
+        38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 32, 31, 31, 30, 30, 30, 29, 29,
+        29, 29, 28, 28, 28, 28, 28, 28, 35, 36, 37, 38, 38, 38, 37, 37, 36, 35,
+        35, 34, 33, 33, 32, 32, 31, 31, 30, 30, 30, 29, 29, 29, 29, 28, 28, 28,
+        28, 28, 28, 28, 34, 35, 36, 37, 37, 37, 36, 36, 35, 35, 34, 33, 33, 32,
+        32, 31, 31, 31, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28, 28, 28, 28, 28,
+        33, 34, 35, 36, 36, 36, 35, 35, 35, 34, 33, 33, 32, 32, 31, 31, 31, 30,
+        30, 29, 29, 29, 29, 28, 28, 28, 28, 28, 27, 27, 27, 27, 33, 33, 34, 35,
+        35, 35, 34, 34, 34, 33, 33, 32, 32, 31, 31, 31, 30, 30, 30, 29, 29, 29,
+        28, 28, 28, 28, 28, 27, 27, 27, 27, 27, 33, 33, 34, 35, 35, 35, 34, 34,
+        34, 33, 33, 32, 32, 31, 31, 31, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28,
+        28, 27, 27, 27, 27, 27, 33, 33, 34, 35, 35, 35, 34, 34, 34, 33, 33, 32,
+        32, 31, 31, 31, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 28, 27, 27, 27,
+        27, 27, 33, 33, 34, 35, 35, 35, 34, 34, 34, 33, 33, 32, 32, 31, 31, 31,
+        30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 28, 27, 27, 27, 27, 27 },
+      { /* Intra matrices */
+        /* Size 4 */
+        129, 119, 81, 60, 119, 86, 68, 56, 81, 68, 55, 49, 60, 56, 49, 45,
+        /* Size 8 */
+        111, 141, 132, 105, 84, 70, 60, 54, 141, 125, 130, 114, 94, 78, 67, 59,
+        132, 130, 99, 88, 78, 69, 62, 56, 105, 114, 88, 74, 66, 61, 56, 53, 84,
+        94, 78, 66, 59, 55, 52, 50, 70, 78, 69, 61, 55, 52, 49, 47, 60, 67, 62,
+        56, 52, 49, 47, 46, 54, 59, 56, 53, 50, 47, 46, 44,
+        /* Size 16 */
+        114, 129, 144, 140, 135, 122, 108, 97, 86, 79, 71, 67, 62, 59, 55, 55,
+        129, 133, 137, 135, 134, 123, 113, 102, 91, 84, 76, 70, 65, 62, 58, 58,
+        144, 137, 129, 131, 134, 125, 117, 107, 96, 88, 80, 74, 68, 64, 60, 60,
+        140, 135, 131, 124, 118, 111, 104, 96, 88, 82, 76, 71, 66, 62, 59, 59,
+        135, 134, 134, 118, 102, 96, 90, 85, 80, 76, 71, 67, 63, 60, 58, 58,
+        122, 123, 125, 111, 96, 89, 83, 79, 74, 71, 67, 64, 61, 58, 56, 56, 108,
+        113, 117, 104, 90, 83, 76, 72, 68, 65, 63, 60, 58, 56, 54, 54, 97, 102,
+        107, 96, 85, 79, 72, 68, 65, 62, 60, 58, 56, 54, 53, 53, 86, 91, 96, 88,
+        80, 74, 68, 65, 61, 59, 57, 55, 54, 52, 51, 51, 79, 84, 88, 82, 76, 71,
+        65, 62, 59, 57, 55, 53, 52, 51, 50, 50, 71, 76, 80, 76, 71, 67, 63, 60,
+        57, 55, 53, 52, 50, 50, 49, 49, 67, 70, 74, 71, 67, 64, 60, 58, 55, 53,
+        52, 51, 49, 49, 48, 48, 62, 65, 68, 66, 63, 61, 58, 56, 54, 52, 50, 49,
+        48, 48, 47, 47, 59, 62, 64, 62, 60, 58, 56, 54, 52, 51, 50, 49, 48, 47,
+        46, 46, 55, 58, 60, 59, 58, 56, 54, 53, 51, 50, 49, 48, 47, 46, 46, 46,
+        55, 58, 60, 59, 58, 56, 54, 53, 51, 50, 49, 48, 47, 46, 46, 46,
+        /* Size 32 */
+        116, 123, 131, 139, 146, 144, 142, 139, 137, 130, 123, 116, 109, 104,
+        98, 93, 87, 84, 80, 76, 72, 70, 68, 65, 63, 61, 59, 58, 56, 56, 56, 56,
+        123, 128, 133, 138, 142, 141, 140, 138, 137, 130, 124, 118, 112, 106,
+        101, 95, 90, 86, 82, 78, 75, 72, 69, 67, 64, 63, 61, 59, 57, 57, 57, 57,
+        131, 133, 135, 137, 138, 138, 137, 137, 136, 131, 125, 120, 114, 109,
+        103, 98, 93, 89, 85, 81, 77, 74, 71, 69, 66, 64, 62, 61, 59, 59, 59, 59,
+        139, 138, 137, 136, 134, 135, 135, 135, 136, 131, 126, 121, 116, 111,
+        106, 100, 95, 91, 87, 83, 79, 76, 73, 71, 68, 66, 64, 62, 60, 60, 60,
+        60, 146, 142, 138, 134, 130, 132, 133, 134, 135, 131, 127, 123, 119,
+        113, 108, 103, 98, 94, 89, 85, 81, 78, 75, 72, 69, 67, 65, 63, 61, 61,
+        61, 61, 144, 141, 138, 135, 132, 131, 129, 128, 127, 123, 120, 116, 112,
+        107, 103, 98, 94, 90, 86, 83, 79, 76, 73, 71, 68, 66, 64, 62, 61, 61,
+        61, 61, 142, 140, 137, 135, 133, 129, 126, 123, 119, 116, 112, 109, 105,
+        101, 97, 93, 90, 86, 83, 80, 77, 74, 72, 69, 67, 65, 63, 62, 60, 60, 60,
+        60, 139, 138, 137, 135, 134, 128, 123, 117, 111, 108, 105, 101, 98, 95,
+        92, 89, 86, 83, 80, 77, 74, 72, 70, 68, 66, 64, 62, 61, 59, 59, 59, 59,
+        137, 137, 136, 136, 135, 127, 119, 111, 103, 100, 97, 94, 91, 89, 86,
+        84, 81, 79, 77, 74, 72, 70, 68, 66, 64, 63, 61, 60, 58, 58, 58, 58, 130,
+        130, 131, 131, 131, 123, 116, 108, 100, 97, 94, 91, 88, 85, 83, 81, 78,
+        76, 74, 72, 70, 68, 66, 65, 63, 62, 60, 59, 57, 57, 57, 57, 123, 124,
+        125, 126, 127, 120, 112, 105, 97, 94, 91, 87, 84, 82, 80, 77, 75, 73,
+        71, 70, 68, 66, 65, 63, 61, 60, 59, 58, 57, 57, 57, 57, 116, 118, 120,
+        121, 123, 116, 109, 101, 94, 91, 87, 84, 80, 78, 76, 74, 72, 71, 69, 67,
+        66, 64, 63, 61, 60, 59, 58, 57, 56, 56, 56, 56, 109, 112, 114, 116, 119,
+        112, 105, 98, 91, 88, 84, 80, 77, 75, 73, 71, 69, 68, 66, 65, 63, 62,
+        61, 60, 59, 58, 57, 56, 55, 55, 55, 55, 104, 106, 109, 111, 113, 107,
+        101, 95, 89, 85, 82, 78, 75, 73, 71, 69, 67, 66, 65, 63, 62, 61, 60, 59,
+        58, 57, 56, 55, 54, 54, 54, 54, 98, 101, 103, 106, 108, 103, 97, 92, 86,
+        83, 80, 76, 73, 71, 69, 67, 65, 64, 63, 62, 60, 59, 58, 57, 56, 56, 55,
+        54, 53, 53, 53, 53, 93, 95, 98, 100, 103, 98, 93, 89, 84, 81, 77, 74,
+        71, 69, 67, 65, 64, 62, 61, 60, 59, 58, 57, 56, 55, 55, 54, 53, 52, 52,
+        52, 52, 87, 90, 93, 95, 98, 94, 90, 86, 81, 78, 75, 72, 69, 67, 65, 64,
+        62, 61, 60, 59, 57, 57, 56, 55, 54, 54, 53, 52, 52, 52, 52, 52, 84, 86,
+        89, 91, 94, 90, 86, 83, 79, 76, 73, 71, 68, 66, 64, 62, 61, 60, 59, 58,
+        56, 56, 55, 54, 53, 53, 52, 52, 51, 51, 51, 51, 80, 82, 85, 87, 89, 86,
+        83, 80, 77, 74, 71, 69, 66, 65, 63, 61, 60, 59, 58, 57, 56, 55, 54, 53,
+        53, 52, 52, 51, 50, 50, 50, 50, 76, 78, 81, 83, 85, 83, 80, 77, 74, 72,
+        70, 67, 65, 63, 62, 60, 59, 58, 57, 56, 55, 54, 53, 53, 52, 51, 51, 50,
+        50, 50, 50, 50, 72, 75, 77, 79, 81, 79, 77, 74, 72, 70, 68, 66, 63, 62,
+        60, 59, 57, 56, 56, 55, 54, 53, 52, 52, 51, 51, 50, 50, 49, 49, 49, 49,
+        70, 72, 74, 76, 78, 76, 74, 72, 70, 68, 66, 64, 62, 61, 59, 58, 57, 56,
+        55, 54, 53, 52, 52, 51, 51, 50, 50, 49, 49, 49, 49, 49, 68, 69, 71, 73,
+        75, 73, 72, 70, 68, 66, 65, 63, 61, 60, 58, 57, 56, 55, 54, 53, 52, 52,
+        51, 51, 50, 50, 49, 49, 48, 48, 48, 48, 65, 67, 69, 71, 72, 71, 69, 68,
+        66, 65, 63, 61, 60, 59, 57, 56, 55, 54, 53, 53, 52, 51, 51, 50, 50, 49,
+        49, 48, 48, 48, 48, 48, 63, 64, 66, 68, 69, 68, 67, 66, 64, 63, 61, 60,
+        59, 58, 56, 55, 54, 53, 53, 52, 51, 51, 50, 50, 49, 49, 48, 48, 47, 47,
+        47, 47, 61, 63, 64, 66, 67, 66, 65, 64, 63, 62, 60, 59, 58, 57, 56, 55,
+        54, 53, 52, 51, 51, 50, 50, 49, 49, 48, 48, 48, 47, 47, 47, 47, 59, 61,
+        62, 64, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 52, 51,
+        50, 50, 49, 49, 48, 48, 48, 47, 47, 47, 47, 47, 58, 59, 61, 62, 63, 62,
+        62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 52, 51, 50, 50, 49, 49, 48,
+        48, 48, 47, 47, 47, 47, 47, 47, 56, 57, 59, 60, 61, 61, 60, 59, 58, 57,
+        57, 56, 55, 54, 53, 52, 52, 51, 50, 50, 49, 49, 48, 48, 47, 47, 47, 47,
+        46, 46, 46, 46, 56, 57, 59, 60, 61, 61, 60, 59, 58, 57, 57, 56, 55, 54,
+        53, 52, 52, 51, 50, 50, 49, 49, 48, 48, 47, 47, 47, 47, 46, 46, 46, 46,
+        56, 57, 59, 60, 61, 61, 60, 59, 58, 57, 57, 56, 55, 54, 53, 52, 52, 51,
+        50, 50, 49, 49, 48, 48, 47, 47, 47, 47, 46, 46, 46, 46, 56, 57, 59, 60,
+        61, 61, 60, 59, 58, 57, 57, 56, 55, 54, 53, 52, 52, 51, 50, 50, 49, 49,
+        48, 48, 47, 47, 47, 47, 46, 46, 46, 46 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 45, 42, 36, 45, 40, 38, 35, 42, 38, 32, 30, 36, 35, 30, 27,
+        /* Size 8 */
+        64, 76, 48, 46, 44, 41, 37, 35, 76, 57, 47, 51, 50, 46, 43, 39, 48, 47,
+        42, 43, 43, 42, 40, 37, 46, 51, 43, 40, 39, 38, 36, 35, 44, 50, 43, 39,
+        36, 34, 33, 32, 41, 46, 42, 38, 34, 33, 31, 30, 37, 43, 40, 36, 33, 31,
+        30, 29, 35, 39, 37, 35, 32, 30, 29, 28,
+        /* Size 16 */
+        64, 70, 76, 62, 48, 47, 46, 45, 44, 42, 41, 39, 37, 36, 35, 35, 70, 68,
+        66, 57, 48, 48, 48, 48, 47, 45, 44, 42, 40, 39, 37, 37, 76, 66, 57, 52,
+        47, 49, 51, 50, 50, 48, 46, 45, 43, 41, 39, 39, 62, 57, 52, 48, 45, 46,
+        47, 47, 47, 45, 44, 43, 41, 40, 38, 38, 48, 48, 47, 45, 42, 43, 43, 43,
+        43, 43, 42, 41, 40, 38, 37, 37, 47, 48, 49, 46, 43, 42, 42, 41, 41, 40,
+        40, 39, 38, 37, 36, 36, 46, 48, 51, 47, 43, 42, 40, 39, 39, 38, 38, 37,
+        36, 35, 35, 35, 45, 48, 50, 47, 43, 41, 39, 38, 37, 37, 36, 35, 35, 34,
+        33, 33, 44, 47, 50, 47, 43, 41, 39, 37, 36, 35, 34, 34, 33, 33, 32, 32,
+        42, 45, 48, 45, 43, 40, 38, 37, 35, 34, 34, 33, 32, 32, 31, 31, 41, 44,
+        46, 44, 42, 40, 38, 36, 34, 34, 33, 32, 31, 31, 30, 30, 39, 42, 45, 43,
+        41, 39, 37, 35, 34, 33, 32, 31, 31, 30, 30, 30, 37, 40, 43, 41, 40, 38,
+        36, 35, 33, 32, 31, 31, 30, 29, 29, 29, 36, 39, 41, 40, 38, 37, 35, 34,
+        33, 32, 31, 30, 29, 29, 28, 28, 35, 37, 39, 38, 37, 36, 35, 33, 32, 31,
+        30, 30, 29, 28, 28, 28, 35, 37, 39, 38, 37, 36, 35, 33, 32, 31, 30, 30,
+        29, 28, 28, 28,
+        /* Size 32 */
+        64, 67, 70, 73, 76, 69, 62, 55, 48, 47, 47, 46, 46, 45, 45, 44, 44, 43,
+        42, 41, 41, 40, 39, 38, 37, 37, 36, 35, 35, 35, 35, 35, 67, 68, 69, 70,
+        71, 65, 60, 54, 48, 48, 47, 47, 47, 47, 46, 46, 45, 44, 44, 43, 42, 41,
+        40, 40, 39, 38, 37, 37, 36, 36, 36, 36, 70, 69, 68, 67, 66, 62, 57, 52,
+        48, 48, 48, 48, 48, 48, 48, 47, 47, 46, 45, 44, 44, 43, 42, 41, 40, 39,
+        39, 38, 37, 37, 37, 37, 73, 70, 67, 65, 62, 58, 55, 51, 47, 48, 49, 49,
+        50, 49, 49, 49, 48, 47, 47, 46, 45, 44, 43, 42, 41, 41, 40, 39, 38, 38,
+        38, 38, 76, 71, 66, 62, 57, 54, 52, 50, 47, 48, 49, 50, 51, 51, 50, 50,
+        50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 41, 40, 39, 39, 39, 39, 69, 65,
+        62, 58, 54, 52, 50, 48, 46, 47, 47, 48, 49, 49, 49, 48, 48, 47, 47, 46,
+        45, 44, 44, 43, 42, 41, 40, 39, 39, 39, 39, 39, 62, 60, 57, 55, 52, 50,
+        48, 47, 45, 45, 46, 46, 47, 47, 47, 47, 47, 46, 45, 45, 44, 43, 43, 42,
+        41, 40, 40, 39, 38, 38, 38, 38, 55, 54, 52, 51, 50, 48, 47, 45, 44, 44,
+        44, 45, 45, 45, 45, 45, 45, 45, 44, 44, 43, 42, 42, 41, 40, 40, 39, 38,
+        38, 38, 38, 38, 48, 48, 48, 47, 47, 46, 45, 44, 42, 43, 43, 43, 43, 43,
+        43, 43, 43, 43, 43, 42, 42, 41, 41, 40, 40, 39, 38, 38, 37, 37, 37, 37,
+        47, 48, 48, 48, 48, 47, 45, 44, 43, 43, 43, 42, 42, 42, 42, 42, 42, 42,
+        42, 41, 41, 40, 40, 39, 39, 38, 38, 37, 36, 36, 36, 36, 47, 47, 48, 49,
+        49, 47, 46, 44, 43, 43, 42, 42, 42, 41, 41, 41, 41, 41, 40, 40, 40, 39,
+        39, 38, 38, 37, 37, 36, 36, 36, 36, 36, 46, 47, 48, 49, 50, 48, 46, 45,
+        43, 42, 42, 41, 41, 40, 40, 40, 40, 40, 39, 39, 39, 38, 38, 37, 37, 37,
+        36, 36, 35, 35, 35, 35, 46, 47, 48, 50, 51, 49, 47, 45, 43, 42, 42, 41,
+        40, 40, 39, 39, 39, 38, 38, 38, 38, 37, 37, 37, 36, 36, 35, 35, 35, 35,
+        35, 35, 45, 47, 48, 49, 51, 49, 47, 45, 43, 42, 41, 40, 40, 39, 39, 38,
+        38, 38, 37, 37, 37, 36, 36, 36, 35, 35, 35, 34, 34, 34, 34, 34, 45, 46,
+        48, 49, 50, 49, 47, 45, 43, 42, 41, 40, 39, 39, 38, 38, 37, 37, 37, 36,
+        36, 36, 35, 35, 35, 34, 34, 34, 33, 33, 33, 33, 44, 46, 47, 49, 50, 48,
+        47, 45, 43, 42, 41, 40, 39, 38, 38, 37, 37, 36, 36, 36, 35, 35, 35, 34,
+        34, 34, 33, 33, 33, 33, 33, 33, 44, 45, 47, 48, 50, 48, 47, 45, 43, 42,
+        41, 40, 39, 38, 37, 37, 36, 36, 35, 35, 34, 34, 34, 34, 33, 33, 33, 32,
+        32, 32, 32, 32, 43, 44, 46, 47, 49, 47, 46, 45, 43, 42, 41, 40, 38, 38,
+        37, 36, 36, 35, 35, 34, 34, 34, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32,
+        42, 44, 45, 47, 48, 47, 45, 44, 43, 42, 40, 39, 38, 37, 37, 36, 35, 35,
+        34, 34, 34, 33, 33, 33, 32, 32, 32, 32, 31, 31, 31, 31, 41, 43, 44, 46,
+        47, 46, 45, 44, 42, 41, 40, 39, 38, 37, 36, 36, 35, 34, 34, 33, 33, 33,
+        32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 41, 42, 44, 45, 46, 45, 44, 43,
+        42, 41, 40, 39, 38, 37, 36, 35, 34, 34, 34, 33, 33, 32, 32, 32, 31, 31,
+        31, 31, 30, 30, 30, 30, 40, 41, 43, 44, 46, 44, 43, 42, 41, 40, 39, 38,
+        37, 36, 36, 35, 34, 34, 33, 33, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30,
+        30, 30, 39, 40, 42, 43, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 35,
+        34, 33, 33, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 38, 40,
+        41, 42, 44, 43, 42, 41, 40, 39, 38, 37, 37, 36, 35, 34, 34, 33, 33, 32,
+        32, 31, 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, 37, 39, 40, 41, 43, 42,
+        41, 40, 40, 39, 38, 37, 36, 35, 35, 34, 33, 33, 32, 32, 31, 31, 31, 30,
+        30, 30, 29, 29, 29, 29, 29, 29, 37, 38, 39, 41, 42, 41, 40, 40, 39, 38,
+        37, 37, 36, 35, 34, 34, 33, 33, 32, 32, 31, 31, 30, 30, 30, 29, 29, 29,
+        29, 29, 29, 29, 36, 37, 39, 40, 41, 40, 40, 39, 38, 38, 37, 36, 35, 35,
+        34, 33, 33, 32, 32, 31, 31, 30, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28,
+        35, 37, 38, 39, 40, 39, 39, 38, 38, 37, 36, 36, 35, 34, 34, 33, 32, 32,
+        32, 31, 31, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 28, 35, 36, 37, 38,
+        39, 39, 38, 38, 37, 36, 36, 35, 35, 34, 33, 33, 32, 32, 31, 31, 30, 30,
+        30, 29, 29, 29, 28, 28, 28, 28, 28, 28, 35, 36, 37, 38, 39, 39, 38, 38,
+        37, 36, 36, 35, 35, 34, 33, 33, 32, 32, 31, 31, 30, 30, 30, 29, 29, 29,
+        28, 28, 28, 28, 28, 28, 35, 36, 37, 38, 39, 39, 38, 38, 37, 36, 36, 35,
+        35, 34, 33, 33, 32, 32, 31, 31, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28,
+        28, 28, 35, 36, 37, 38, 39, 39, 38, 38, 37, 36, 36, 35, 35, 34, 33, 33,
+        32, 32, 31, 31, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 28, 28 },
+      { /* Intra matrices */
+        /* Size 4 */
+        117, 81, 76, 64, 81, 71, 67, 61, 76, 67, 57, 52, 64, 61, 52, 47,
+        /* Size 8 */
+        110, 131, 81, 77, 74, 68, 62, 57, 131, 97, 80, 86, 84, 78, 72, 65, 81,
+        80, 71, 73, 73, 70, 66, 62, 77, 86, 73, 67, 64, 63, 60, 57, 74, 84, 73,
+        64, 60, 57, 55, 53, 68, 78, 70, 63, 57, 54, 51, 50, 62, 72, 66, 60, 55,
+        51, 49, 47, 57, 65, 62, 57, 53, 50, 47, 45,
+        /* Size 16 */
+        112, 123, 134, 108, 83, 81, 79, 77, 75, 72, 69, 66, 64, 61, 59, 59, 123,
+        120, 116, 99, 82, 83, 83, 82, 81, 78, 75, 71, 68, 65, 63, 63, 134, 116,
+        99, 90, 81, 85, 88, 87, 86, 83, 80, 77, 73, 70, 67, 67, 108, 99, 90, 84,
+        77, 79, 81, 81, 80, 78, 76, 73, 70, 68, 65, 65, 83, 82, 81, 77, 72, 73,
+        74, 74, 74, 73, 72, 70, 67, 65, 63, 63, 81, 83, 85, 79, 73, 72, 71, 71,
+        70, 69, 68, 66, 64, 62, 61, 61, 79, 83, 88, 81, 74, 71, 68, 67, 66, 65,
+        64, 62, 61, 60, 58, 58, 77, 82, 87, 81, 74, 71, 67, 65, 63, 62, 61, 60,
+        59, 57, 56, 56, 75, 81, 86, 80, 74, 70, 66, 63, 61, 59, 58, 57, 56, 55,
+        54, 54, 72, 78, 83, 78, 73, 69, 65, 62, 59, 58, 56, 55, 54, 53, 52, 52,
+        69, 75, 80, 76, 72, 68, 64, 61, 58, 56, 55, 54, 52, 51, 51, 51, 66, 71,
+        77, 73, 70, 66, 62, 60, 57, 55, 54, 52, 51, 50, 49, 49, 64, 68, 73, 70,
+        67, 64, 61, 59, 56, 54, 52, 51, 50, 49, 48, 48, 61, 65, 70, 68, 65, 62,
+        60, 57, 55, 53, 51, 50, 49, 48, 47, 47, 59, 63, 67, 65, 63, 61, 58, 56,
+        54, 52, 51, 49, 48, 47, 46, 46, 59, 63, 67, 65, 63, 61, 58, 56, 54, 52,
+        51, 49, 48, 47, 46, 46,
+        /* Size 32 */
+        113, 119, 124, 130, 135, 122, 109, 96, 83, 82, 82, 81, 80, 79, 78, 77,
+        76, 74, 73, 71, 70, 69, 67, 66, 64, 63, 62, 60, 59, 59, 59, 59, 119,
+        121, 123, 124, 126, 116, 105, 94, 83, 83, 83, 82, 82, 81, 80, 79, 79,
+        77, 76, 74, 73, 71, 70, 68, 67, 65, 64, 63, 61, 61, 61, 61, 124, 123,
+        121, 119, 118, 109, 100, 91, 83, 83, 84, 84, 84, 84, 83, 82, 81, 80, 78,
+        77, 75, 74, 72, 71, 69, 68, 66, 65, 63, 63, 63, 63, 130, 124, 119, 114,
+        109, 102, 96, 89, 83, 84, 84, 85, 86, 86, 85, 85, 84, 83, 81, 80, 78,
+        76, 75, 73, 71, 70, 68, 67, 65, 65, 65, 65, 135, 126, 118, 109, 100, 95,
+        91, 87, 82, 84, 85, 87, 89, 88, 88, 87, 87, 85, 84, 82, 81, 79, 77, 76,
+        74, 72, 71, 69, 67, 67, 67, 67, 122, 116, 109, 102, 95, 92, 88, 84, 80,
+        81, 83, 84, 85, 85, 85, 84, 84, 83, 81, 80, 79, 77, 76, 74, 72, 71, 69,
+        68, 66, 66, 66, 66, 109, 105, 100, 96, 91, 88, 84, 81, 78, 79, 80, 81,
+        82, 82, 81, 81, 81, 80, 79, 78, 77, 75, 74, 72, 71, 70, 68, 67, 65, 65,
+        65, 65, 96, 94, 91, 89, 87, 84, 81, 78, 75, 76, 77, 78, 78, 78, 78, 78,
+        78, 77, 76, 75, 74, 73, 72, 71, 70, 68, 67, 66, 65, 65, 65, 65, 83, 83,
+        83, 83, 82, 80, 78, 75, 73, 74, 74, 75, 75, 75, 75, 75, 75, 74, 74, 73,
+        72, 71, 70, 69, 68, 67, 66, 65, 64, 64, 64, 64, 82, 83, 83, 84, 84, 81,
+        79, 76, 74, 74, 73, 73, 73, 73, 73, 73, 73, 72, 72, 71, 70, 69, 68, 68,
+        67, 66, 64, 63, 62, 62, 62, 62, 82, 83, 84, 84, 85, 83, 80, 77, 74, 73,
+        73, 72, 72, 72, 71, 71, 71, 70, 70, 69, 68, 68, 67, 66, 65, 64, 63, 62,
+        61, 61, 61, 61, 81, 82, 84, 85, 87, 84, 81, 78, 75, 73, 72, 71, 70, 70,
+        69, 69, 69, 68, 67, 67, 66, 66, 65, 64, 63, 63, 62, 61, 60, 60, 60, 60,
+        80, 82, 84, 86, 89, 85, 82, 78, 75, 73, 72, 70, 69, 68, 67, 67, 66, 66,
+        65, 65, 64, 64, 63, 62, 62, 61, 60, 60, 59, 59, 59, 59, 79, 81, 84, 86,
+        88, 85, 82, 78, 75, 73, 72, 70, 68, 67, 67, 66, 65, 65, 64, 63, 63, 62,
+        62, 61, 60, 60, 59, 58, 58, 58, 58, 58, 78, 80, 83, 85, 88, 85, 81, 78,
+        75, 73, 71, 69, 67, 67, 66, 65, 64, 63, 63, 62, 62, 61, 60, 60, 59, 59,
+        58, 57, 57, 57, 57, 57, 77, 79, 82, 85, 87, 84, 81, 78, 75, 73, 71, 69,
+        67, 66, 65, 64, 63, 62, 61, 61, 60, 60, 59, 58, 58, 57, 57, 56, 56, 56,
+        56, 56, 76, 79, 81, 84, 87, 84, 81, 78, 75, 73, 71, 69, 66, 65, 64, 63,
+        61, 61, 60, 59, 59, 58, 58, 57, 57, 56, 56, 55, 54, 54, 54, 54, 74, 77,
+        80, 83, 85, 83, 80, 77, 74, 72, 70, 68, 66, 65, 63, 62, 61, 60, 59, 59,
+        58, 57, 57, 56, 56, 55, 55, 54, 54, 54, 54, 54, 73, 76, 78, 81, 84, 81,
+        79, 76, 74, 72, 70, 67, 65, 64, 63, 61, 60, 59, 59, 58, 57, 56, 56, 55,
+        55, 54, 54, 53, 53, 53, 53, 53, 71, 74, 77, 80, 82, 80, 78, 75, 73, 71,
+        69, 67, 65, 63, 62, 61, 59, 59, 58, 57, 56, 56, 55, 54, 54, 53, 53, 52,
+        52, 52, 52, 52, 70, 73, 75, 78, 81, 79, 77, 74, 72, 70, 68, 66, 64, 63,
+        62, 60, 59, 58, 57, 56, 55, 55, 54, 53, 53, 52, 52, 51, 51, 51, 51, 51,
+        69, 71, 74, 76, 79, 77, 75, 73, 71, 69, 68, 66, 64, 62, 61, 60, 58, 57,
+        56, 56, 55, 54, 53, 53, 52, 52, 51, 51, 50, 50, 50, 50, 67, 70, 72, 75,
+        77, 76, 74, 72, 70, 68, 67, 65, 63, 62, 60, 59, 58, 57, 56, 55, 54, 53,
+        53, 52, 52, 51, 51, 50, 50, 50, 50, 50, 66, 68, 71, 73, 76, 74, 72, 71,
+        69, 68, 66, 64, 62, 61, 60, 58, 57, 56, 55, 54, 53, 53, 52, 52, 51, 51,
+        50, 50, 49, 49, 49, 49, 64, 67, 69, 71, 74, 72, 71, 70, 68, 67, 65, 63,
+        62, 60, 59, 58, 57, 56, 55, 54, 53, 52, 52, 51, 50, 50, 49, 49, 49, 49,
+        49, 49, 63, 65, 68, 70, 72, 71, 70, 68, 67, 66, 64, 63, 61, 60, 59, 57,
+        56, 55, 54, 53, 52, 52, 51, 51, 50, 49, 49, 49, 48, 48, 48, 48, 62, 64,
+        66, 68, 71, 69, 68, 67, 66, 64, 63, 62, 60, 59, 58, 57, 56, 55, 54, 53,
+        52, 51, 51, 50, 49, 49, 49, 48, 48, 48, 48, 48, 60, 63, 65, 67, 69, 68,
+        67, 66, 65, 63, 62, 61, 60, 58, 57, 56, 55, 54, 53, 52, 51, 51, 50, 50,
+        49, 49, 48, 48, 47, 47, 47, 47, 59, 61, 63, 65, 67, 66, 65, 65, 64, 62,
+        61, 60, 59, 58, 57, 56, 54, 54, 53, 52, 51, 50, 50, 49, 49, 48, 48, 47,
+        47, 47, 47, 47, 59, 61, 63, 65, 67, 66, 65, 65, 64, 62, 61, 60, 59, 58,
+        57, 56, 54, 54, 53, 52, 51, 50, 50, 49, 49, 48, 48, 47, 47, 47, 47, 47,
+        59, 61, 63, 65, 67, 66, 65, 65, 64, 62, 61, 60, 59, 58, 57, 56, 54, 54,
+        53, 52, 51, 50, 50, 49, 49, 48, 48, 47, 47, 47, 47, 47, 59, 61, 63, 65,
+        67, 66, 65, 65, 64, 62, 61, 60, 59, 58, 57, 56, 54, 54, 53, 52, 51, 50,
+        50, 49, 49, 48, 48, 47, 47, 47, 47, 47 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 60, 43, 33, 60, 45, 37, 32, 43, 37, 31, 29, 33, 32, 29, 27,
+        /* Size 8 */
+        64, 79, 74, 61, 50, 43, 38, 35, 79, 71, 73, 66, 55, 47, 42, 38, 74, 73,
+        58, 52, 48, 43, 39, 36, 61, 66, 52, 45, 42, 39, 37, 35, 50, 55, 48, 42,
+        38, 36, 34, 33, 43, 47, 43, 39, 36, 34, 33, 32, 38, 42, 39, 37, 34, 33,
+        32, 31, 35, 38, 36, 35, 33, 32, 31, 31,
+        /* Size 16 */
+        64, 71, 79, 77, 74, 68, 61, 56, 50, 47, 43, 41, 38, 37, 35, 35, 71, 73,
+        75, 74, 74, 69, 63, 58, 53, 49, 45, 43, 40, 38, 37, 37, 79, 75, 71, 72,
+        73, 70, 66, 60, 55, 51, 47, 45, 42, 40, 38, 38, 77, 74, 72, 69, 66, 62,
+        59, 55, 51, 48, 45, 43, 40, 39, 37, 37, 74, 74, 73, 66, 58, 55, 52, 50,
+        48, 45, 43, 41, 39, 38, 36, 36, 68, 69, 70, 62, 55, 52, 49, 47, 45, 43,
+        41, 39, 38, 37, 36, 36, 61, 63, 66, 59, 52, 49, 45, 43, 42, 40, 39, 38,
+        37, 36, 35, 35, 56, 58, 60, 55, 50, 47, 43, 42, 40, 39, 37, 36, 35, 35,
+        34, 34, 50, 53, 55, 51, 48, 45, 42, 40, 38, 37, 36, 35, 34, 34, 33, 33,
+        47, 49, 51, 48, 45, 43, 40, 39, 37, 36, 35, 34, 34, 33, 33, 33, 43, 45,
+        47, 45, 43, 41, 39, 37, 36, 35, 34, 33, 33, 32, 32, 32, 41, 43, 45, 43,
+        41, 39, 38, 36, 35, 34, 33, 33, 32, 32, 32, 32, 38, 40, 42, 40, 39, 38,
+        37, 35, 34, 34, 33, 32, 32, 31, 31, 31, 37, 38, 40, 39, 38, 37, 36, 35,
+        34, 33, 32, 32, 31, 31, 31, 31, 35, 37, 38, 37, 36, 36, 35, 34, 33, 33,
+        32, 32, 31, 31, 31, 31, 35, 37, 38, 37, 36, 36, 35, 34, 33, 33, 32, 32,
+        31, 31, 31, 31,
+        /* Size 32 */
+        64, 68, 71, 75, 79, 78, 77, 75, 74, 71, 68, 64, 61, 58, 56, 53, 50, 49,
+        47, 45, 43, 42, 41, 40, 38, 38, 37, 36, 35, 35, 35, 35, 68, 70, 72, 75,
+        77, 76, 76, 75, 74, 71, 68, 65, 62, 59, 57, 54, 52, 50, 48, 46, 44, 43,
+        42, 40, 39, 38, 38, 37, 36, 36, 36, 36, 71, 72, 73, 74, 75, 75, 74, 74,
+        74, 71, 69, 66, 63, 61, 58, 55, 53, 51, 49, 47, 45, 44, 43, 41, 40, 39,
+        38, 37, 37, 37, 37, 37, 75, 75, 74, 74, 73, 73, 73, 74, 74, 71, 69, 67,
+        64, 62, 59, 57, 54, 52, 50, 48, 46, 45, 44, 42, 41, 40, 39, 38, 37, 37,
+        37, 37, 79, 77, 75, 73, 71, 72, 72, 73, 73, 72, 70, 68, 66, 63, 60, 58,
+        55, 53, 51, 49, 47, 46, 45, 43, 42, 41, 40, 39, 38, 38, 38, 38, 78, 76,
+        75, 73, 72, 71, 71, 70, 70, 68, 66, 64, 62, 60, 58, 56, 53, 52, 50, 48,
+        46, 45, 44, 42, 41, 40, 39, 38, 37, 37, 37, 37, 77, 76, 74, 73, 72, 71,
+        69, 67, 66, 64, 62, 61, 59, 57, 55, 53, 51, 50, 48, 47, 45, 44, 43, 42,
+        40, 40, 39, 38, 37, 37, 37, 37, 75, 75, 74, 74, 73, 70, 67, 65, 62, 60,
+        59, 57, 56, 54, 53, 51, 49, 48, 47, 45, 44, 43, 42, 41, 40, 39, 38, 37,
+        37, 37, 37, 37, 74, 74, 74, 74, 73, 70, 66, 62, 58, 56, 55, 54, 52, 51,
+        50, 49, 48, 46, 45, 44, 43, 42, 41, 40, 39, 39, 38, 37, 36, 36, 36, 36,
+        71, 71, 71, 71, 72, 68, 64, 60, 56, 55, 54, 52, 51, 49, 48, 47, 46, 45,
+        44, 43, 42, 41, 40, 39, 39, 38, 37, 37, 36, 36, 36, 36, 68, 68, 69, 69,
+        70, 66, 62, 59, 55, 54, 52, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40,
+        39, 39, 38, 37, 37, 36, 36, 36, 36, 36, 64, 65, 66, 67, 68, 64, 61, 57,
+        54, 52, 50, 49, 47, 46, 45, 44, 43, 42, 41, 41, 40, 39, 39, 38, 37, 37,
+        36, 36, 35, 35, 35, 35, 61, 62, 63, 64, 66, 62, 59, 56, 52, 51, 49, 47,
+        45, 44, 43, 42, 42, 41, 40, 39, 39, 38, 38, 37, 37, 36, 36, 35, 35, 35,
+        35, 35, 58, 59, 61, 62, 63, 60, 57, 54, 51, 49, 48, 46, 44, 43, 42, 42,
+        41, 40, 39, 39, 38, 38, 37, 37, 36, 36, 35, 35, 34, 34, 34, 34, 56, 57,
+        58, 59, 60, 58, 55, 53, 50, 48, 47, 45, 43, 42, 42, 41, 40, 39, 39, 38,
+        37, 37, 36, 36, 35, 35, 35, 34, 34, 34, 34, 34, 53, 54, 55, 57, 58, 56,
+        53, 51, 49, 47, 46, 44, 42, 42, 41, 40, 39, 38, 38, 37, 37, 36, 36, 35,
+        35, 35, 34, 34, 34, 34, 34, 34, 50, 52, 53, 54, 55, 53, 51, 49, 48, 46,
+        45, 43, 42, 41, 40, 39, 38, 38, 37, 36, 36, 36, 35, 35, 34, 34, 34, 33,
+        33, 33, 33, 33, 49, 50, 51, 52, 53, 52, 50, 48, 46, 45, 44, 42, 41, 40,
+        39, 38, 38, 37, 36, 36, 35, 35, 35, 34, 34, 34, 33, 33, 33, 33, 33, 33,
+        47, 48, 49, 50, 51, 50, 48, 47, 45, 44, 43, 41, 40, 39, 39, 38, 37, 36,
+        36, 36, 35, 35, 34, 34, 34, 33, 33, 33, 33, 33, 33, 33, 45, 46, 47, 48,
+        49, 48, 47, 45, 44, 43, 42, 41, 39, 39, 38, 37, 36, 36, 36, 35, 35, 34,
+        34, 34, 33, 33, 33, 33, 32, 32, 32, 32, 43, 44, 45, 46, 47, 46, 45, 44,
+        43, 42, 41, 40, 39, 38, 37, 37, 36, 35, 35, 35, 34, 34, 33, 33, 33, 33,
+        32, 32, 32, 32, 32, 32, 42, 43, 44, 45, 46, 45, 44, 43, 42, 41, 40, 39,
+        38, 38, 37, 36, 36, 35, 35, 34, 34, 33, 33, 33, 33, 32, 32, 32, 32, 32,
+        32, 32, 41, 42, 43, 44, 45, 44, 43, 42, 41, 40, 39, 39, 38, 37, 36, 36,
+        35, 35, 34, 34, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40,
+        41, 42, 43, 42, 42, 41, 40, 39, 39, 38, 37, 37, 36, 35, 35, 34, 34, 34,
+        33, 33, 33, 32, 32, 32, 32, 32, 31, 31, 31, 31, 38, 39, 40, 41, 42, 41,
+        40, 40, 39, 39, 38, 37, 37, 36, 35, 35, 34, 34, 34, 33, 33, 33, 32, 32,
+        32, 32, 31, 31, 31, 31, 31, 31, 38, 38, 39, 40, 41, 40, 40, 39, 39, 38,
+        37, 37, 36, 36, 35, 35, 34, 34, 33, 33, 33, 32, 32, 32, 32, 31, 31, 31,
+        31, 31, 31, 31, 37, 38, 38, 39, 40, 39, 39, 38, 38, 37, 37, 36, 36, 35,
+        35, 34, 34, 33, 33, 33, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31,
+        36, 37, 37, 38, 39, 38, 38, 37, 37, 37, 36, 36, 35, 35, 34, 34, 33, 33,
+        33, 33, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 35, 36, 37, 37,
+        38, 37, 37, 37, 36, 36, 36, 35, 35, 34, 34, 34, 33, 33, 33, 32, 32, 32,
+        32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 35, 36, 37, 37, 38, 37, 37, 37,
+        36, 36, 36, 35, 35, 34, 34, 34, 33, 33, 33, 32, 32, 32, 32, 31, 31, 31,
+        31, 31, 31, 31, 31, 31, 35, 36, 37, 37, 38, 37, 37, 37, 36, 36, 36, 35,
+        35, 34, 34, 34, 33, 33, 33, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31,
+        31, 31, 35, 36, 37, 37, 38, 37, 37, 37, 36, 36, 36, 35, 35, 34, 34, 34,
+        33, 33, 33, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31 },
+      { /* Intra matrices */
+        /* Size 4 */
+        118, 110, 77, 59, 110, 82, 67, 57, 77, 67, 56, 51, 59, 57, 51, 47,
+        /* Size 8 */
+        103, 128, 121, 98, 80, 68, 60, 55, 128, 115, 119, 106, 89, 75, 65, 59,
+        121, 119, 93, 83, 75, 68, 61, 56, 98, 106, 83, 71, 65, 61, 57, 54, 80,
+        89, 75, 65, 59, 56, 53, 51, 68, 75, 68, 61, 56, 53, 51, 49, 60, 65, 61,
+        57, 53, 51, 49, 48, 55, 59, 56, 54, 51, 49, 48, 47,
+        /* Size 16 */
+        106, 118, 131, 127, 123, 112, 100, 91, 82, 76, 69, 65, 61, 59, 56, 56,
+        118, 122, 125, 124, 123, 113, 104, 95, 86, 80, 73, 69, 64, 61, 58, 58,
+        131, 125, 118, 120, 122, 115, 108, 99, 91, 84, 77, 72, 67, 64, 60, 60,
+        127, 124, 120, 114, 108, 103, 97, 90, 84, 78, 73, 69, 65, 62, 59, 59,
+        123, 123, 122, 108, 95, 90, 85, 81, 77, 73, 69, 66, 63, 60, 58, 58, 112,
+        113, 115, 103, 90, 85, 79, 76, 72, 69, 66, 63, 60, 58, 56, 56, 100, 104,
+        108, 97, 85, 79, 73, 70, 67, 64, 62, 60, 58, 56, 55, 55, 91, 95, 99, 90,
+        81, 76, 70, 67, 64, 62, 60, 58, 56, 55, 54, 54, 82, 86, 91, 84, 77, 72,
+        67, 64, 61, 59, 57, 56, 54, 53, 52, 52, 76, 80, 84, 78, 73, 69, 64, 62,
+        59, 57, 55, 54, 53, 52, 51, 51, 69, 73, 77, 73, 69, 66, 62, 60, 57, 55,
+        54, 53, 52, 51, 50, 50, 65, 69, 72, 69, 66, 63, 60, 58, 56, 54, 53, 52,
+        51, 50, 49, 49, 61, 64, 67, 65, 63, 60, 58, 56, 54, 53, 52, 51, 50, 49,
+        49, 49, 59, 61, 64, 62, 60, 58, 56, 55, 53, 52, 51, 50, 49, 49, 48, 48,
+        56, 58, 60, 59, 58, 56, 55, 54, 52, 51, 50, 49, 49, 48, 48, 48, 56, 58,
+        60, 59, 58, 56, 55, 54, 52, 51, 50, 49, 49, 48, 48, 48,
+        /* Size 32 */
+        107, 113, 120, 126, 133, 131, 129, 127, 125, 119, 113, 107, 102, 97, 92,
+        88, 83, 80, 77, 73, 70, 68, 66, 64, 62, 61, 59, 58, 57, 57, 57, 57, 113,
+        117, 121, 125, 129, 128, 127, 126, 125, 119, 114, 109, 104, 99, 94, 90,
+        85, 82, 79, 75, 72, 70, 68, 66, 64, 62, 61, 59, 58, 58, 58, 58, 120,
+        121, 123, 125, 126, 126, 125, 125, 124, 119, 115, 110, 105, 101, 96, 92,
+        87, 84, 81, 77, 74, 72, 69, 67, 65, 63, 62, 60, 59, 59, 59, 59, 126,
+        125, 125, 124, 123, 123, 123, 124, 124, 120, 116, 112, 107, 103, 98, 94,
+        90, 86, 83, 79, 76, 73, 71, 69, 66, 65, 63, 61, 60, 60, 60, 60, 133,
+        129, 126, 123, 119, 120, 121, 122, 123, 120, 116, 113, 109, 105, 101,
+        96, 92, 88, 85, 81, 78, 75, 73, 70, 68, 66, 64, 63, 61, 61, 61, 61, 131,
+        128, 126, 123, 120, 119, 118, 118, 117, 113, 110, 107, 104, 100, 96, 92,
+        88, 85, 82, 79, 76, 74, 71, 69, 67, 65, 64, 62, 60, 60, 60, 60, 129,
+        127, 125, 123, 121, 118, 116, 113, 110, 107, 104, 101, 98, 95, 91, 88,
+        85, 82, 79, 77, 74, 72, 70, 68, 66, 64, 63, 61, 60, 60, 60, 60, 127,
+        126, 125, 124, 122, 118, 113, 108, 103, 100, 98, 95, 92, 89, 87, 84, 81,
+        79, 77, 74, 72, 70, 68, 66, 65, 63, 62, 60, 59, 59, 59, 59, 125, 125,
+        124, 124, 123, 117, 110, 103, 96, 94, 91, 89, 86, 84, 82, 80, 78, 76,
+        74, 72, 70, 68, 67, 65, 63, 62, 61, 60, 58, 58, 58, 58, 119, 119, 119,
+        120, 120, 113, 107, 100, 94, 91, 88, 86, 83, 81, 79, 77, 75, 74, 72, 70,
+        68, 67, 65, 64, 62, 61, 60, 59, 58, 58, 58, 58, 113, 114, 115, 116, 116,
+        110, 104, 98, 91, 88, 86, 83, 80, 78, 76, 75, 73, 71, 70, 68, 66, 65,
+        64, 62, 61, 60, 59, 58, 57, 57, 57, 57, 107, 109, 110, 112, 113, 107,
+        101, 95, 89, 86, 83, 80, 77, 75, 74, 72, 70, 69, 67, 66, 65, 63, 62, 61,
+        60, 59, 58, 57, 56, 56, 56, 56, 102, 104, 105, 107, 109, 104, 98, 92,
+        86, 83, 80, 77, 74, 72, 71, 69, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58,
+        57, 56, 55, 55, 55, 55, 97, 99, 101, 103, 105, 100, 95, 89, 84, 81, 78,
+        75, 72, 71, 69, 68, 66, 65, 64, 63, 61, 61, 60, 59, 58, 57, 56, 56, 55,
+        55, 55, 55, 92, 94, 96, 98, 101, 96, 91, 87, 82, 79, 76, 74, 71, 69, 68,
+        66, 64, 63, 62, 61, 60, 59, 59, 58, 57, 56, 56, 55, 54, 54, 54, 54, 88,
+        90, 92, 94, 96, 92, 88, 84, 80, 77, 75, 72, 69, 68, 66, 64, 63, 62, 61,
+        60, 59, 58, 57, 57, 56, 55, 55, 54, 53, 53, 53, 53, 83, 85, 87, 90, 92,
+        88, 85, 81, 78, 75, 73, 70, 67, 66, 64, 63, 61, 60, 60, 59, 58, 57, 56,
+        56, 55, 54, 54, 53, 53, 53, 53, 53, 80, 82, 84, 86, 88, 85, 82, 79, 76,
+        74, 71, 69, 66, 65, 63, 62, 60, 60, 59, 58, 57, 56, 56, 55, 54, 54, 53,
+        53, 52, 52, 52, 52, 77, 79, 81, 83, 85, 82, 79, 77, 74, 72, 70, 67, 65,
+        64, 62, 61, 60, 59, 58, 57, 56, 55, 55, 54, 54, 53, 53, 52, 52, 52, 52,
+        52, 73, 75, 77, 79, 81, 79, 77, 74, 72, 70, 68, 66, 64, 63, 61, 60, 59,
+        58, 57, 56, 55, 55, 54, 54, 53, 53, 52, 52, 51, 51, 51, 51, 70, 72, 74,
+        76, 78, 76, 74, 72, 70, 68, 66, 65, 63, 61, 60, 59, 58, 57, 56, 55, 54,
+        54, 53, 53, 52, 52, 52, 51, 51, 51, 51, 51, 68, 70, 72, 73, 75, 74, 72,
+        70, 68, 67, 65, 63, 62, 61, 59, 58, 57, 56, 55, 55, 54, 53, 53, 52, 52,
+        52, 51, 51, 50, 50, 50, 50, 66, 68, 69, 71, 73, 71, 70, 68, 67, 65, 64,
+        62, 61, 60, 59, 57, 56, 56, 55, 54, 53, 53, 52, 52, 51, 51, 51, 50, 50,
+        50, 50, 50, 64, 66, 67, 69, 70, 69, 68, 66, 65, 64, 62, 61, 60, 59, 58,
+        57, 56, 55, 54, 54, 53, 52, 52, 51, 51, 51, 50, 50, 50, 50, 50, 50, 62,
+        64, 65, 66, 68, 67, 66, 65, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 54,
+        53, 52, 52, 51, 51, 51, 50, 50, 50, 49, 49, 49, 49, 61, 62, 63, 65, 66,
+        65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 54, 53, 53, 52, 52, 51,
+        51, 50, 50, 50, 49, 49, 49, 49, 49, 59, 61, 62, 63, 64, 64, 63, 62, 61,
+        60, 59, 58, 57, 56, 56, 55, 54, 53, 53, 52, 52, 51, 51, 50, 50, 50, 49,
+        49, 49, 49, 49, 49, 58, 59, 60, 61, 63, 62, 61, 60, 60, 59, 58, 57, 56,
+        56, 55, 54, 53, 53, 52, 52, 51, 51, 50, 50, 50, 49, 49, 49, 49, 49, 49,
+        49, 57, 58, 59, 60, 61, 60, 60, 59, 58, 58, 57, 56, 55, 55, 54, 53, 53,
+        52, 52, 51, 51, 50, 50, 50, 49, 49, 49, 49, 48, 48, 48, 48, 57, 58, 59,
+        60, 61, 60, 60, 59, 58, 58, 57, 56, 55, 55, 54, 53, 53, 52, 52, 51, 51,
+        50, 50, 50, 49, 49, 49, 49, 48, 48, 48, 48, 57, 58, 59, 60, 61, 60, 60,
+        59, 58, 58, 57, 56, 55, 55, 54, 53, 53, 52, 52, 51, 51, 50, 50, 50, 49,
+        49, 49, 49, 48, 48, 48, 48, 57, 58, 59, 60, 61, 60, 60, 59, 58, 58, 57,
+        56, 55, 55, 54, 53, 53, 52, 52, 51, 51, 50, 50, 50, 49, 49, 49, 49, 48,
+        48, 48, 48 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 46, 44, 38, 46, 41, 40, 37, 44, 40, 35, 32, 38, 37, 32, 30,
+        /* Size 8 */
+        64, 75, 49, 47, 45, 42, 40, 37, 75, 57, 49, 52, 51, 48, 44, 41, 49, 49,
+        44, 45, 45, 44, 42, 39, 47, 52, 45, 42, 41, 40, 38, 37, 45, 51, 45, 41,
+        38, 37, 36, 35, 42, 48, 44, 40, 37, 35, 34, 33, 40, 44, 42, 38, 36, 34,
+        33, 32, 37, 41, 39, 37, 35, 33, 32, 31,
+        /* Size 16 */
+        64, 70, 75, 62, 49, 48, 47, 46, 45, 44, 42, 41, 40, 38, 37, 37, 70, 68,
+        66, 58, 49, 49, 50, 49, 48, 47, 45, 44, 42, 40, 39, 39, 75, 66, 57, 53,
+        49, 50, 52, 51, 51, 49, 48, 46, 44, 43, 41, 41, 62, 58, 53, 50, 46, 47,
+        48, 48, 48, 47, 46, 44, 43, 42, 40, 40, 49, 49, 49, 46, 44, 44, 45, 45,
+        45, 44, 44, 43, 42, 40, 39, 39, 48, 49, 50, 47, 44, 44, 43, 43, 43, 42,
+        42, 41, 40, 39, 38, 38, 47, 50, 52, 48, 45, 43, 42, 41, 41, 40, 40, 39,
+        38, 38, 37, 37, 46, 49, 51, 48, 45, 43, 41, 40, 39, 39, 38, 38, 37, 36,
+        36, 36, 45, 48, 51, 48, 45, 43, 41, 39, 38, 37, 37, 36, 36, 35, 35, 35,
+        44, 47, 49, 47, 44, 42, 40, 39, 37, 37, 36, 35, 35, 34, 34, 34, 42, 45,
+        48, 46, 44, 42, 40, 38, 37, 36, 35, 34, 34, 33, 33, 33, 41, 44, 46, 44,
+        43, 41, 39, 38, 36, 35, 34, 34, 33, 33, 32, 32, 40, 42, 44, 43, 42, 40,
+        38, 37, 36, 35, 34, 33, 33, 32, 32, 32, 38, 40, 43, 42, 40, 39, 38, 36,
+        35, 34, 33, 33, 32, 32, 31, 31, 37, 39, 41, 40, 39, 38, 37, 36, 35, 34,
+        33, 32, 32, 31, 31, 31, 37, 39, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32,
+        32, 31, 31, 31,
+        /* Size 32 */
+        64, 67, 70, 72, 75, 69, 62, 56, 49, 49, 48, 48, 47, 47, 46, 46, 45, 45,
+        44, 43, 42, 42, 41, 40, 40, 39, 38, 38, 37, 37, 37, 37, 67, 68, 69, 70,
+        71, 65, 60, 54, 49, 49, 49, 49, 48, 48, 48, 47, 47, 46, 45, 45, 44, 43,
+        42, 41, 41, 40, 39, 39, 38, 38, 38, 38, 70, 69, 68, 67, 66, 62, 58, 53,
+        49, 49, 49, 49, 50, 49, 49, 48, 48, 47, 47, 46, 45, 44, 44, 43, 42, 41,
+        40, 40, 39, 39, 39, 39, 72, 70, 67, 64, 62, 59, 55, 52, 49, 49, 50, 50,
+        51, 50, 50, 50, 49, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40, 40,
+        40, 40, 75, 71, 66, 62, 57, 55, 53, 51, 49, 49, 50, 51, 52, 52, 51, 51,
+        51, 50, 49, 49, 48, 47, 46, 45, 44, 44, 43, 42, 41, 41, 41, 41, 69, 65,
+        62, 59, 55, 53, 51, 49, 47, 48, 49, 49, 50, 50, 50, 50, 49, 49, 48, 47,
+        47, 46, 45, 44, 44, 43, 42, 41, 41, 41, 41, 41, 62, 60, 58, 55, 53, 51,
+        50, 48, 46, 47, 47, 48, 48, 48, 48, 48, 48, 47, 47, 46, 46, 45, 44, 44,
+        43, 42, 42, 41, 40, 40, 40, 40, 56, 54, 53, 52, 51, 49, 48, 47, 45, 46,
+        46, 46, 47, 47, 47, 47, 46, 46, 46, 45, 45, 44, 43, 43, 42, 42, 41, 40,
+        40, 40, 40, 40, 49, 49, 49, 49, 49, 47, 46, 45, 44, 44, 44, 45, 45, 45,
+        45, 45, 45, 45, 44, 44, 44, 43, 43, 42, 42, 41, 40, 40, 39, 39, 39, 39,
+        49, 49, 49, 49, 49, 48, 47, 46, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
+        43, 43, 43, 42, 42, 41, 41, 40, 40, 39, 39, 39, 39, 39, 48, 49, 49, 50,
+        50, 49, 47, 46, 44, 44, 44, 44, 43, 43, 43, 43, 43, 43, 42, 42, 42, 41,
+        41, 40, 40, 39, 39, 38, 38, 38, 38, 38, 48, 49, 49, 50, 51, 49, 48, 46,
+        45, 44, 44, 43, 43, 42, 42, 42, 42, 41, 41, 41, 41, 40, 40, 39, 39, 39,
+        38, 38, 37, 37, 37, 37, 47, 48, 50, 51, 52, 50, 48, 47, 45, 44, 43, 43,
+        42, 41, 41, 41, 41, 40, 40, 40, 40, 39, 39, 39, 38, 38, 38, 37, 37, 37,
+        37, 37, 47, 48, 49, 50, 52, 50, 48, 47, 45, 44, 43, 42, 41, 41, 41, 40,
+        40, 40, 39, 39, 39, 39, 38, 38, 38, 37, 37, 37, 36, 36, 36, 36, 46, 48,
+        49, 50, 51, 50, 48, 47, 45, 44, 43, 42, 41, 41, 40, 40, 39, 39, 39, 38,
+        38, 38, 38, 37, 37, 37, 36, 36, 36, 36, 36, 36, 46, 47, 48, 50, 51, 50,
+        48, 47, 45, 44, 43, 42, 41, 40, 40, 39, 39, 38, 38, 38, 37, 37, 37, 37,
+        36, 36, 36, 35, 35, 35, 35, 35, 45, 47, 48, 49, 51, 49, 48, 46, 45, 44,
+        43, 42, 41, 40, 39, 39, 38, 38, 37, 37, 37, 37, 36, 36, 36, 35, 35, 35,
+        35, 35, 35, 35, 45, 46, 47, 49, 50, 49, 47, 46, 45, 44, 43, 41, 40, 40,
+        39, 38, 38, 37, 37, 37, 36, 36, 36, 36, 35, 35, 35, 34, 34, 34, 34, 34,
+        44, 45, 47, 48, 49, 48, 47, 46, 44, 43, 42, 41, 40, 39, 39, 38, 37, 37,
+        37, 36, 36, 36, 35, 35, 35, 35, 34, 34, 34, 34, 34, 34, 43, 45, 46, 47,
+        49, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 38, 37, 37, 36, 36, 35, 35,
+        35, 35, 34, 34, 34, 34, 33, 33, 33, 33, 42, 44, 45, 46, 48, 47, 46, 45,
+        44, 43, 42, 41, 40, 39, 38, 37, 37, 36, 36, 35, 35, 35, 34, 34, 34, 34,
+        33, 33, 33, 33, 33, 33, 42, 43, 44, 46, 47, 46, 45, 44, 43, 42, 41, 40,
+        39, 39, 38, 37, 37, 36, 36, 35, 35, 34, 34, 34, 34, 33, 33, 33, 33, 33,
+        33, 33, 41, 42, 44, 45, 46, 45, 44, 43, 43, 42, 41, 40, 39, 38, 38, 37,
+        36, 36, 35, 35, 34, 34, 34, 34, 33, 33, 33, 33, 32, 32, 32, 32, 40, 41,
+        43, 44, 45, 44, 44, 43, 42, 41, 40, 39, 39, 38, 37, 37, 36, 36, 35, 35,
+        34, 34, 34, 33, 33, 33, 32, 32, 32, 32, 32, 32, 40, 41, 42, 43, 44, 44,
+        43, 42, 42, 41, 40, 39, 38, 38, 37, 36, 36, 35, 35, 34, 34, 34, 33, 33,
+        33, 32, 32, 32, 32, 32, 32, 32, 39, 40, 41, 42, 44, 43, 42, 42, 41, 40,
+        39, 39, 38, 37, 37, 36, 35, 35, 35, 34, 34, 33, 33, 33, 32, 32, 32, 32,
+        31, 31, 31, 31, 38, 39, 40, 42, 43, 42, 42, 41, 40, 40, 39, 38, 38, 37,
+        36, 36, 35, 35, 34, 34, 33, 33, 33, 32, 32, 32, 32, 31, 31, 31, 31, 31,
+        38, 39, 40, 41, 42, 41, 41, 40, 40, 39, 38, 38, 37, 37, 36, 35, 35, 34,
+        34, 34, 33, 33, 33, 32, 32, 32, 31, 31, 31, 31, 31, 31, 37, 38, 39, 40,
+        41, 41, 40, 40, 39, 39, 38, 37, 37, 36, 36, 35, 35, 34, 34, 33, 33, 33,
+        32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 37, 38, 39, 40, 41, 41, 40, 40,
+        39, 39, 38, 37, 37, 36, 36, 35, 35, 34, 34, 33, 33, 33, 32, 32, 32, 31,
+        31, 31, 31, 31, 31, 31, 37, 38, 39, 40, 41, 41, 40, 40, 39, 39, 38, 37,
+        37, 36, 36, 35, 35, 34, 34, 33, 33, 33, 32, 32, 32, 31, 31, 31, 31, 31,
+        31, 31, 37, 38, 39, 40, 41, 41, 40, 40, 39, 39, 38, 37, 37, 36, 36, 35,
+        35, 34, 34, 33, 33, 33, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31 },
+      { /* Intra matrices */
+        /* Size 4 */
+        110, 78, 74, 64, 78, 69, 66, 61, 74, 66, 57, 53, 64, 61, 53, 49,
+        /* Size 8 */
+        103, 122, 78, 75, 72, 67, 62, 58, 122, 92, 77, 83, 81, 76, 70, 65, 78,
+        77, 70, 71, 71, 69, 66, 62, 75, 83, 71, 66, 64, 62, 60, 58, 72, 81, 71,
+        64, 60, 58, 56, 54, 67, 76, 69, 62, 58, 55, 53, 51, 62, 70, 66, 60, 56,
+        53, 50, 49, 58, 65, 62, 58, 54, 51, 49, 47,
+        /* Size 16 */
+        105, 115, 124, 102, 80, 78, 77, 75, 73, 71, 68, 66, 63, 61, 59, 59, 115,
+        112, 109, 94, 79, 80, 80, 79, 78, 75, 73, 70, 67, 65, 62, 62, 124, 109,
+        94, 86, 79, 82, 84, 83, 83, 80, 77, 74, 72, 69, 66, 66, 102, 94, 86, 81,
+        75, 77, 78, 78, 78, 76, 74, 71, 69, 67, 64, 64, 80, 79, 79, 75, 71, 72,
+        73, 73, 73, 71, 70, 68, 67, 65, 63, 63, 78, 80, 82, 77, 72, 71, 70, 69,
+        69, 68, 67, 65, 64, 62, 61, 61, 77, 80, 84, 78, 73, 70, 67, 66, 65, 64,
+        63, 62, 61, 60, 59, 59, 75, 79, 83, 78, 73, 69, 66, 64, 63, 62, 61, 60,
+        59, 58, 57, 57, 73, 78, 83, 78, 73, 69, 65, 63, 61, 60, 59, 58, 57, 56,
+        55, 55, 71, 75, 80, 76, 71, 68, 64, 62, 60, 58, 57, 56, 55, 54, 53, 53,
+        68, 73, 77, 74, 70, 67, 63, 61, 59, 57, 55, 54, 53, 53, 52, 52, 66, 70,
+        74, 71, 68, 65, 62, 60, 58, 56, 54, 53, 52, 52, 51, 51, 63, 67, 72, 69,
+        67, 64, 61, 59, 57, 55, 53, 52, 51, 51, 50, 50, 61, 65, 69, 67, 65, 62,
+        60, 58, 56, 54, 53, 52, 51, 50, 49, 49, 59, 62, 66, 64, 63, 61, 59, 57,
+        55, 53, 52, 51, 50, 49, 48, 48, 59, 62, 66, 64, 63, 61, 59, 57, 55, 53,
+        52, 51, 50, 49, 48, 48,
+        /* Size 32 */
+        106, 111, 116, 121, 126, 114, 103, 92, 80, 80, 79, 78, 77, 76, 76, 75,
+        74, 73, 71, 70, 69, 68, 66, 65, 64, 63, 62, 60, 59, 59, 59, 59, 111,
+        113, 114, 116, 118, 108, 99, 90, 80, 80, 80, 79, 79, 79, 78, 77, 76, 75,
+        74, 72, 71, 70, 69, 67, 66, 65, 64, 62, 61, 61, 61, 61, 116, 114, 113,
+        112, 110, 103, 95, 87, 80, 80, 81, 81, 81, 81, 80, 79, 79, 77, 76, 75,
+        73, 72, 71, 69, 68, 67, 65, 64, 63, 63, 63, 63, 121, 116, 112, 107, 102,
+        97, 91, 85, 80, 81, 81, 82, 83, 83, 82, 82, 81, 80, 78, 77, 76, 74, 73,
+        71, 70, 69, 67, 66, 65, 65, 65, 65, 126, 118, 110, 102, 95, 91, 87, 83,
+        79, 81, 82, 84, 85, 85, 84, 84, 83, 82, 81, 79, 78, 77, 75, 74, 72, 71,
+        69, 68, 67, 67, 67, 67, 114, 108, 103, 97, 91, 87, 84, 81, 77, 79, 80,
+        81, 82, 82, 81, 81, 81, 80, 79, 77, 76, 75, 74, 72, 71, 70, 68, 67, 66,
+        66, 66, 66, 103, 99, 95, 91, 87, 84, 81, 78, 75, 76, 77, 78, 79, 79, 79,
+        79, 78, 77, 76, 75, 75, 73, 72, 71, 70, 68, 67, 66, 65, 65, 65, 65, 92,
+        90, 87, 85, 83, 81, 78, 76, 74, 74, 75, 75, 76, 76, 76, 76, 76, 75, 74,
+        73, 73, 72, 71, 70, 68, 67, 66, 65, 64, 64, 64, 64, 80, 80, 80, 80, 79,
+        77, 75, 74, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73, 72, 72, 71, 70, 69,
+        68, 67, 66, 65, 64, 63, 63, 63, 63, 80, 80, 80, 81, 81, 79, 76, 74, 72,
+        72, 72, 72, 72, 72, 72, 71, 71, 71, 70, 70, 69, 68, 68, 67, 66, 65, 64,
+        63, 62, 62, 62, 62, 79, 80, 81, 81, 82, 80, 77, 75, 72, 72, 71, 71, 70,
+        70, 70, 70, 69, 69, 68, 68, 67, 67, 66, 65, 64, 64, 63, 62, 61, 61, 61,
+        61, 78, 79, 81, 82, 84, 81, 78, 75, 73, 72, 71, 70, 69, 69, 68, 68, 68,
+        67, 67, 66, 66, 65, 64, 64, 63, 62, 62, 61, 60, 60, 60, 60, 77, 79, 81,
+        83, 85, 82, 79, 76, 73, 72, 70, 69, 68, 67, 67, 66, 66, 65, 65, 64, 64,
+        63, 63, 62, 62, 61, 60, 60, 59, 59, 59, 59, 76, 79, 81, 83, 85, 82, 79,
+        76, 73, 72, 70, 69, 67, 66, 66, 65, 65, 64, 64, 63, 63, 62, 62, 61, 61,
+        60, 59, 59, 58, 58, 58, 58, 76, 78, 80, 82, 84, 81, 79, 76, 73, 72, 70,
+        68, 67, 66, 65, 64, 64, 63, 62, 62, 61, 61, 60, 60, 59, 59, 58, 58, 57,
+        57, 57, 57, 75, 77, 79, 82, 84, 81, 79, 76, 73, 71, 70, 68, 66, 65, 64,
+        63, 62, 62, 61, 61, 60, 60, 59, 59, 58, 58, 57, 57, 56, 56, 56, 56, 74,
+        76, 79, 81, 83, 81, 78, 76, 73, 71, 69, 68, 66, 65, 64, 62, 61, 61, 60,
+        60, 59, 59, 58, 58, 57, 57, 56, 56, 55, 55, 55, 55, 73, 75, 77, 80, 82,
+        80, 77, 75, 73, 71, 69, 67, 65, 64, 63, 62, 61, 60, 60, 59, 58, 58, 57,
+        57, 56, 56, 55, 55, 55, 55, 55, 55, 71, 74, 76, 78, 81, 79, 76, 74, 72,
+        70, 68, 67, 65, 64, 62, 61, 60, 60, 59, 58, 58, 57, 57, 56, 56, 55, 55,
+        54, 54, 54, 54, 54, 70, 72, 75, 77, 79, 77, 75, 73, 72, 70, 68, 66, 64,
+        63, 62, 61, 60, 59, 58, 57, 57, 56, 56, 55, 55, 54, 54, 54, 53, 53, 53,
+        53, 69, 71, 73, 76, 78, 76, 75, 73, 71, 69, 67, 66, 64, 63, 61, 60, 59,
+        58, 58, 57, 56, 55, 55, 54, 54, 54, 53, 53, 52, 52, 52, 52, 68, 70, 72,
+        74, 77, 75, 73, 72, 70, 68, 67, 65, 63, 62, 61, 60, 59, 58, 57, 56, 55,
+        55, 54, 54, 53, 53, 53, 52, 52, 52, 52, 52, 66, 69, 71, 73, 75, 74, 72,
+        71, 69, 68, 66, 64, 63, 62, 60, 59, 58, 57, 57, 56, 55, 54, 54, 53, 53,
+        52, 52, 52, 51, 51, 51, 51, 65, 67, 69, 71, 74, 72, 71, 70, 68, 67, 65,
+        64, 62, 61, 60, 59, 58, 57, 56, 55, 54, 54, 53, 53, 52, 52, 52, 51, 51,
+        51, 51, 51, 64, 66, 68, 70, 72, 71, 70, 68, 67, 66, 64, 63, 62, 61, 59,
+        58, 57, 56, 56, 55, 54, 53, 53, 52, 52, 51, 51, 51, 50, 50, 50, 50, 63,
+        65, 67, 69, 71, 70, 68, 67, 66, 65, 64, 62, 61, 60, 59, 58, 57, 56, 55,
+        54, 54, 53, 52, 52, 51, 51, 51, 50, 50, 50, 50, 50, 62, 64, 65, 67, 69,
+        68, 67, 66, 65, 64, 63, 62, 60, 59, 58, 57, 56, 55, 55, 54, 53, 53, 52,
+        52, 51, 51, 50, 50, 49, 49, 49, 49, 60, 62, 64, 66, 68, 67, 66, 65, 64,
+        63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 54, 53, 52, 52, 51, 51, 50, 50,
+        49, 49, 49, 49, 49, 59, 61, 63, 65, 67, 66, 65, 64, 63, 62, 61, 60, 59,
+        58, 57, 56, 55, 55, 54, 53, 52, 52, 51, 51, 50, 50, 49, 49, 49, 49, 49,
+        49, 59, 61, 63, 65, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55,
+        55, 54, 53, 52, 52, 51, 51, 50, 50, 49, 49, 49, 49, 49, 49, 59, 61, 63,
+        65, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 55, 54, 53, 52,
+        52, 51, 51, 50, 50, 49, 49, 49, 49, 49, 49, 59, 61, 63, 65, 67, 66, 65,
+        64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 55, 54, 53, 52, 52, 51, 51, 50,
+        50, 49, 49, 49, 49, 49, 49 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 60, 45, 36, 60, 47, 40, 35, 45, 40, 34, 32, 36, 35, 32, 31,
+        /* Size 8 */
+        64, 77, 73, 61, 52, 45, 41, 38, 77, 70, 73, 65, 56, 49, 44, 40, 73, 73,
+        58, 53, 49, 45, 42, 39, 61, 65, 53, 47, 44, 41, 39, 38, 52, 56, 49, 44,
+        41, 39, 37, 36, 45, 49, 45, 41, 39, 37, 36, 35, 41, 44, 42, 39, 37, 36,
+        35, 34, 38, 40, 39, 38, 36, 35, 34, 34,
+        /* Size 16 */
+        64, 71, 77, 75, 73, 67, 61, 56, 52, 48, 45, 43, 41, 40, 38, 38, 71, 72,
+        74, 73, 73, 68, 63, 59, 54, 51, 47, 45, 42, 41, 39, 39, 77, 74, 70, 71,
+        73, 69, 65, 61, 56, 53, 49, 46, 44, 42, 40, 40, 75, 73, 71, 69, 66, 62,
+        59, 56, 53, 50, 47, 45, 43, 41, 40, 40, 73, 73, 73, 66, 58, 56, 53, 51,
+        49, 47, 45, 43, 42, 40, 39, 39, 67, 68, 69, 62, 56, 53, 50, 48, 46, 45,
+        43, 42, 41, 39, 38, 38, 61, 63, 65, 59, 53, 50, 47, 45, 44, 43, 41, 40,
+        39, 38, 38, 38, 56, 59, 61, 56, 51, 48, 45, 44, 42, 41, 40, 39, 38, 38,
+        37, 37, 52, 54, 56, 53, 49, 46, 44, 42, 41, 40, 39, 38, 37, 37, 36, 36,
+        48, 51, 53, 50, 47, 45, 43, 41, 40, 39, 38, 37, 37, 36, 36, 36, 45, 47,
+        49, 47, 45, 43, 41, 40, 39, 38, 37, 37, 36, 36, 35, 35, 43, 45, 46, 45,
+        43, 42, 40, 39, 38, 37, 37, 36, 36, 35, 35, 35, 41, 42, 44, 43, 42, 41,
+        39, 38, 37, 37, 36, 36, 35, 35, 34, 34, 40, 41, 42, 41, 40, 39, 38, 38,
+        37, 36, 36, 35, 35, 34, 34, 34, 38, 39, 40, 40, 39, 38, 38, 37, 36, 36,
+        35, 35, 34, 34, 34, 34, 38, 39, 40, 40, 39, 38, 38, 37, 36, 36, 35, 35,
+        34, 34, 34, 34,
+        /* Size 32 */
+        64, 67, 71, 74, 77, 76, 75, 74, 73, 70, 67, 64, 61, 59, 56, 54, 52, 50,
+        48, 47, 45, 44, 43, 42, 41, 40, 40, 39, 38, 38, 38, 38, 67, 69, 71, 74,
+        76, 75, 74, 74, 73, 70, 68, 65, 62, 60, 58, 55, 53, 51, 50, 48, 46, 45,
+        44, 43, 42, 41, 40, 40, 39, 39, 39, 39, 71, 71, 72, 73, 74, 74, 73, 73,
+        73, 71, 68, 66, 63, 61, 59, 56, 54, 52, 51, 49, 47, 46, 45, 44, 42, 42,
+        41, 40, 39, 39, 39, 39, 74, 74, 73, 73, 72, 72, 72, 73, 73, 71, 69, 66,
+        64, 62, 60, 57, 55, 53, 52, 50, 48, 47, 46, 44, 43, 42, 42, 41, 40, 40,
+        40, 40, 77, 76, 74, 72, 70, 71, 71, 72, 73, 71, 69, 67, 65, 63, 61, 59,
+        56, 54, 53, 51, 49, 48, 46, 45, 44, 43, 42, 41, 40, 40, 40, 40, 76, 75,
+        74, 72, 71, 70, 70, 70, 69, 67, 66, 64, 62, 60, 58, 56, 54, 53, 51, 50,
+        48, 47, 46, 45, 43, 43, 42, 41, 40, 40, 40, 40, 75, 74, 73, 72, 71, 70,
+        69, 67, 66, 64, 62, 61, 59, 58, 56, 54, 53, 51, 50, 48, 47, 46, 45, 44,
+        43, 42, 41, 41, 40, 40, 40, 40, 74, 74, 73, 73, 72, 70, 67, 65, 62, 61,
+        59, 58, 56, 55, 54, 52, 51, 50, 49, 47, 46, 45, 44, 43, 42, 42, 41, 40,
+        39, 39, 39, 39, 73, 73, 73, 73, 73, 69, 66, 62, 58, 57, 56, 55, 53, 52,
+        51, 50, 49, 48, 47, 46, 45, 44, 43, 43, 42, 41, 40, 40, 39, 39, 39, 39,
+        70, 70, 71, 71, 71, 67, 64, 61, 57, 56, 55, 53, 52, 51, 50, 49, 48, 47,
+        46, 45, 44, 43, 43, 42, 41, 41, 40, 39, 39, 39, 39, 39, 67, 68, 68, 69,
+        69, 66, 62, 59, 56, 55, 53, 52, 50, 49, 48, 47, 46, 46, 45, 44, 43, 43,
+        42, 41, 41, 40, 39, 39, 38, 38, 38, 38, 64, 65, 66, 66, 67, 64, 61, 58,
+        55, 53, 52, 50, 49, 48, 47, 46, 45, 44, 44, 43, 42, 42, 41, 41, 40, 39,
+        39, 38, 38, 38, 38, 38, 61, 62, 63, 64, 65, 62, 59, 56, 53, 52, 50, 49,
+        47, 46, 45, 45, 44, 43, 43, 42, 41, 41, 40, 40, 39, 39, 38, 38, 38, 38,
+        38, 38, 59, 60, 61, 62, 63, 60, 58, 55, 52, 51, 49, 48, 46, 45, 45, 44,
+        43, 42, 42, 41, 41, 40, 40, 39, 39, 38, 38, 38, 37, 37, 37, 37, 56, 58,
+        59, 60, 61, 58, 56, 54, 51, 50, 48, 47, 45, 45, 44, 43, 42, 42, 41, 41,
+        40, 40, 39, 39, 38, 38, 38, 37, 37, 37, 37, 37, 54, 55, 56, 57, 59, 56,
+        54, 52, 50, 49, 47, 46, 45, 44, 43, 42, 41, 41, 40, 40, 39, 39, 39, 38,
+        38, 38, 37, 37, 37, 37, 37, 37, 52, 53, 54, 55, 56, 54, 53, 51, 49, 48,
+        46, 45, 44, 43, 42, 41, 41, 40, 40, 39, 39, 38, 38, 38, 37, 37, 37, 37,
+        36, 36, 36, 36, 50, 51, 52, 53, 54, 53, 51, 50, 48, 47, 46, 44, 43, 42,
+        42, 41, 40, 40, 39, 39, 38, 38, 38, 37, 37, 37, 37, 36, 36, 36, 36, 36,
+        48, 50, 51, 52, 53, 51, 50, 49, 47, 46, 45, 44, 43, 42, 41, 40, 40, 39,
+        39, 38, 38, 38, 37, 37, 37, 36, 36, 36, 36, 36, 36, 36, 47, 48, 49, 50,
+        51, 50, 48, 47, 46, 45, 44, 43, 42, 41, 41, 40, 39, 39, 38, 38, 38, 37,
+        37, 37, 36, 36, 36, 36, 35, 35, 35, 35, 45, 46, 47, 48, 49, 48, 47, 46,
+        45, 44, 43, 42, 41, 41, 40, 39, 39, 38, 38, 38, 37, 37, 37, 36, 36, 36,
+        36, 35, 35, 35, 35, 35, 44, 45, 46, 47, 48, 47, 46, 45, 44, 43, 43, 42,
+        41, 40, 40, 39, 38, 38, 38, 37, 37, 37, 36, 36, 36, 36, 35, 35, 35, 35,
+        35, 35, 43, 44, 45, 46, 46, 46, 45, 44, 43, 43, 42, 41, 40, 40, 39, 39,
+        38, 38, 37, 37, 37, 36, 36, 36, 36, 35, 35, 35, 35, 35, 35, 35, 42, 43,
+        44, 44, 45, 45, 44, 43, 43, 42, 41, 41, 40, 39, 39, 38, 38, 37, 37, 37,
+        36, 36, 36, 36, 35, 35, 35, 35, 35, 35, 35, 35, 41, 42, 42, 43, 44, 43,
+        43, 42, 42, 41, 41, 40, 39, 39, 38, 38, 37, 37, 37, 36, 36, 36, 36, 35,
+        35, 35, 35, 35, 34, 34, 34, 34, 40, 41, 42, 42, 43, 43, 42, 42, 41, 41,
+        40, 39, 39, 38, 38, 38, 37, 37, 36, 36, 36, 36, 35, 35, 35, 35, 35, 34,
+        34, 34, 34, 34, 40, 40, 41, 42, 42, 42, 41, 41, 40, 40, 39, 39, 38, 38,
+        38, 37, 37, 37, 36, 36, 36, 35, 35, 35, 35, 35, 34, 34, 34, 34, 34, 34,
+        39, 40, 40, 41, 41, 41, 41, 40, 40, 39, 39, 38, 38, 38, 37, 37, 37, 36,
+        36, 36, 35, 35, 35, 35, 35, 34, 34, 34, 34, 34, 34, 34, 38, 39, 39, 40,
+        40, 40, 40, 39, 39, 39, 38, 38, 38, 37, 37, 37, 36, 36, 36, 35, 35, 35,
+        35, 35, 34, 34, 34, 34, 34, 34, 34, 34, 38, 39, 39, 40, 40, 40, 40, 39,
+        39, 39, 38, 38, 38, 37, 37, 37, 36, 36, 36, 35, 35, 35, 35, 35, 34, 34,
+        34, 34, 34, 34, 34, 34, 38, 39, 39, 40, 40, 40, 40, 39, 39, 39, 38, 38,
+        38, 37, 37, 37, 36, 36, 36, 35, 35, 35, 35, 35, 34, 34, 34, 34, 34, 34,
+        34, 34, 38, 39, 39, 40, 40, 40, 40, 39, 39, 39, 38, 38, 38, 37, 37, 37,
+        36, 36, 36, 35, 35, 35, 35, 35, 34, 34, 34, 34, 34, 34, 34, 34 },
+      { /* Intra matrices */
+        /* Size 4 */
+        109, 102, 75, 59, 102, 79, 66, 57, 75, 66, 56, 52, 59, 57, 52, 49,
+        /* Size 8 */
+        96, 117, 111, 92, 77, 67, 60, 55, 117, 106, 110, 98, 84, 73, 64, 59,
+        111, 110, 87, 80, 73, 66, 61, 57, 92, 98, 80, 69, 64, 60, 57, 55, 77,
+        84, 73, 64, 59, 56, 54, 52, 67, 73, 66, 60, 56, 54, 52, 51, 60, 64, 61,
+        57, 54, 52, 51, 49, 55, 59, 57, 55, 52, 51, 49, 49,
+        /* Size 16 */
+        98, 109, 120, 116, 113, 103, 94, 86, 78, 73, 68, 65, 61, 59, 57, 57,
+        109, 111, 114, 113, 112, 105, 97, 89, 82, 76, 71, 67, 63, 61, 58, 58,
+        120, 114, 108, 110, 112, 106, 100, 93, 86, 80, 74, 70, 66, 63, 60, 60,
+        116, 113, 110, 105, 101, 96, 91, 85, 80, 75, 71, 67, 64, 62, 59, 59,
+        113, 112, 112, 101, 89, 85, 81, 78, 74, 71, 68, 65, 62, 60, 58, 58, 103,
+        105, 106, 96, 85, 81, 76, 73, 70, 67, 65, 62, 60, 59, 57, 57, 94, 97,
+        100, 91, 81, 76, 71, 68, 66, 64, 62, 60, 58, 57, 56, 56, 86, 89, 93, 85,
+        78, 73, 68, 66, 63, 61, 60, 58, 57, 56, 55, 55, 78, 82, 86, 80, 74, 70,
+        66, 63, 61, 59, 57, 56, 55, 54, 53, 53, 73, 76, 80, 75, 71, 67, 64, 61,
+        59, 58, 56, 55, 54, 53, 53, 53, 68, 71, 74, 71, 68, 65, 62, 60, 57, 56,
+        55, 54, 53, 52, 52, 52, 65, 67, 70, 67, 65, 62, 60, 58, 56, 55, 54, 53,
+        52, 52, 51, 51, 61, 63, 66, 64, 62, 60, 58, 57, 55, 54, 53, 52, 52, 51,
+        51, 51, 59, 61, 63, 62, 60, 59, 57, 56, 54, 53, 52, 52, 51, 51, 50, 50,
+        57, 58, 60, 59, 58, 57, 56, 55, 53, 53, 52, 51, 51, 50, 50, 50, 57, 58,
+        60, 59, 58, 57, 56, 55, 53, 53, 52, 51, 51, 50, 50, 50,
+        /* Size 32 */
+        99, 105, 110, 115, 121, 119, 118, 116, 114, 109, 104, 100, 95, 91, 87,
+        83, 79, 76, 74, 71, 69, 67, 65, 63, 62, 61, 59, 58, 57, 57, 57, 57, 105,
+        108, 111, 115, 118, 117, 116, 115, 114, 110, 105, 101, 96, 93, 89, 85,
+        81, 78, 76, 73, 70, 68, 67, 65, 63, 62, 61, 59, 58, 58, 58, 58, 110,
+        111, 113, 114, 115, 115, 114, 114, 114, 110, 106, 102, 98, 94, 90, 87,
+        83, 80, 77, 74, 72, 70, 68, 66, 64, 63, 62, 60, 59, 59, 59, 59, 115,
+        115, 114, 113, 112, 113, 113, 113, 113, 110, 106, 103, 100, 96, 92, 88,
+        85, 82, 79, 76, 73, 71, 69, 67, 65, 64, 63, 61, 60, 60, 60, 60, 121,
+        118, 115, 112, 110, 110, 111, 112, 113, 110, 107, 104, 101, 98, 94, 90,
+        87, 84, 81, 78, 75, 73, 71, 69, 67, 65, 64, 62, 61, 61, 61, 61, 119,
+        117, 115, 113, 110, 110, 109, 108, 107, 105, 102, 99, 96, 93, 90, 87,
+        84, 81, 78, 76, 73, 71, 69, 68, 66, 64, 63, 62, 60, 60, 60, 60, 118,
+        116, 114, 113, 111, 109, 106, 104, 102, 99, 97, 94, 92, 89, 86, 83, 81,
+        78, 76, 74, 72, 70, 68, 66, 65, 63, 62, 61, 60, 60, 60, 60, 116, 115,
+        114, 113, 112, 108, 104, 100, 96, 94, 91, 89, 87, 85, 82, 80, 78, 76,
+        74, 72, 70, 68, 67, 65, 64, 63, 62, 60, 59, 59, 59, 59, 114, 114, 114,
+        113, 113, 107, 102, 96, 90, 88, 86, 84, 82, 80, 79, 77, 75, 73, 72, 70,
+        68, 67, 66, 64, 63, 62, 61, 60, 59, 59, 59, 59, 109, 110, 110, 110, 110,
+        105, 99, 94, 88, 86, 84, 82, 79, 78, 76, 74, 73, 71, 70, 68, 67, 66, 64,
+        63, 62, 61, 60, 59, 58, 58, 58, 58, 104, 105, 106, 106, 107, 102, 97,
+        91, 86, 84, 81, 79, 77, 75, 74, 72, 71, 69, 68, 67, 65, 64, 63, 62, 61,
+        60, 59, 58, 57, 57, 57, 57, 100, 101, 102, 103, 104, 99, 94, 89, 84, 82,
+        79, 77, 74, 73, 71, 70, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 58,
+        57, 57, 57, 57, 95, 96, 98, 100, 101, 96, 92, 87, 82, 79, 77, 74, 72,
+        70, 69, 68, 66, 65, 64, 63, 62, 61, 61, 60, 59, 58, 58, 57, 56, 56, 56,
+        56, 91, 93, 94, 96, 98, 93, 89, 85, 80, 78, 75, 73, 70, 69, 68, 66, 65,
+        64, 63, 62, 61, 60, 60, 59, 58, 58, 57, 56, 56, 56, 56, 56, 87, 89, 90,
+        92, 94, 90, 86, 82, 79, 76, 74, 71, 69, 68, 66, 65, 64, 63, 62, 61, 60,
+        59, 59, 58, 57, 57, 56, 56, 55, 55, 55, 55, 83, 85, 87, 88, 90, 87, 83,
+        80, 77, 74, 72, 70, 68, 66, 65, 64, 62, 62, 61, 60, 59, 59, 58, 57, 57,
+        56, 56, 55, 55, 55, 55, 55, 79, 81, 83, 85, 87, 84, 81, 78, 75, 73, 71,
+        68, 66, 65, 64, 62, 61, 60, 60, 59, 58, 58, 57, 56, 56, 55, 55, 54, 54,
+        54, 54, 54, 76, 78, 80, 82, 84, 81, 78, 76, 73, 71, 69, 67, 65, 64, 63,
+        62, 60, 60, 59, 58, 57, 57, 56, 56, 55, 55, 54, 54, 54, 54, 54, 54, 74,
+        76, 77, 79, 81, 78, 76, 74, 72, 70, 68, 66, 64, 63, 62, 61, 60, 59, 58,
+        57, 57, 56, 56, 55, 55, 54, 54, 54, 53, 53, 53, 53, 71, 73, 74, 76, 78,
+        76, 74, 72, 70, 68, 67, 65, 63, 62, 61, 60, 59, 58, 57, 57, 56, 56, 55,
+        55, 54, 54, 53, 53, 53, 53, 53, 53, 69, 70, 72, 73, 75, 73, 72, 70, 68,
+        67, 65, 64, 62, 61, 60, 59, 58, 57, 57, 56, 55, 55, 55, 54, 54, 53, 53,
+        53, 52, 52, 52, 52, 67, 68, 70, 71, 73, 71, 70, 68, 67, 66, 64, 63, 61,
+        60, 59, 59, 58, 57, 56, 56, 55, 55, 54, 54, 53, 53, 53, 52, 52, 52, 52,
+        52, 65, 67, 68, 69, 71, 69, 68, 67, 66, 64, 63, 62, 61, 60, 59, 58, 57,
+        56, 56, 55, 55, 54, 54, 53, 53, 53, 52, 52, 52, 52, 52, 52, 63, 65, 66,
+        67, 69, 68, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 56, 55, 55, 54,
+        54, 53, 53, 52, 52, 52, 52, 51, 51, 51, 51, 62, 63, 64, 65, 67, 66, 65,
+        64, 63, 62, 61, 60, 59, 58, 57, 57, 56, 55, 55, 54, 54, 53, 53, 52, 52,
+        52, 52, 51, 51, 51, 51, 51, 61, 62, 63, 64, 65, 64, 63, 63, 62, 61, 60,
+        59, 58, 58, 57, 56, 55, 55, 54, 54, 53, 53, 53, 52, 52, 52, 51, 51, 51,
+        51, 51, 51, 59, 61, 62, 63, 64, 63, 62, 62, 61, 60, 59, 58, 58, 57, 56,
+        56, 55, 54, 54, 53, 53, 53, 52, 52, 52, 51, 51, 51, 51, 51, 51, 51, 58,
+        59, 60, 61, 62, 62, 61, 60, 60, 59, 58, 58, 57, 56, 56, 55, 54, 54, 54,
+        53, 53, 52, 52, 52, 51, 51, 51, 51, 50, 50, 50, 50, 57, 58, 59, 60, 61,
+        60, 60, 59, 59, 58, 57, 57, 56, 56, 55, 55, 54, 54, 53, 53, 52, 52, 52,
+        51, 51, 51, 51, 50, 50, 50, 50, 50, 57, 58, 59, 60, 61, 60, 60, 59, 59,
+        58, 57, 57, 56, 56, 55, 55, 54, 54, 53, 53, 52, 52, 52, 51, 51, 51, 51,
+        50, 50, 50, 50, 50, 57, 58, 59, 60, 61, 60, 60, 59, 59, 58, 57, 57, 56,
+        56, 55, 55, 54, 54, 53, 53, 52, 52, 52, 51, 51, 51, 51, 50, 50, 50, 50,
+        50, 57, 58, 59, 60, 61, 60, 60, 59, 59, 58, 57, 57, 56, 56, 55, 55, 54,
+        54, 53, 53, 52, 52, 52, 51, 51, 51, 51, 50, 50, 50, 50, 50 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 48, 46, 40, 48, 43, 42, 39, 46, 42, 37, 35, 40, 39, 35, 33,
+        /* Size 8 */
+        64, 74, 50, 49, 47, 44, 42, 39, 74, 58, 50, 53, 52, 49, 46, 43, 50, 50,
+        46, 47, 47, 45, 44, 41, 49, 53, 47, 44, 43, 42, 41, 39, 47, 52, 47, 43,
+        40, 39, 38, 37, 44, 49, 45, 42, 39, 38, 37, 36, 42, 46, 44, 41, 38, 37,
+        35, 35, 39, 43, 41, 39, 37, 36, 35, 34,
+        /* Size 16 */
+        64, 69, 74, 62, 50, 50, 49, 48, 47, 46, 44, 43, 42, 41, 39, 39, 69, 68,
+        66, 58, 50, 51, 51, 50, 50, 48, 47, 45, 44, 43, 41, 41, 74, 66, 58, 54,
+        50, 51, 53, 52, 52, 51, 49, 48, 46, 45, 43, 43, 62, 58, 54, 51, 48, 49,
+        50, 50, 49, 48, 47, 46, 45, 44, 42, 42, 50, 50, 50, 48, 46, 46, 47, 47,
+        47, 46, 45, 45, 44, 43, 41, 41, 50, 51, 51, 49, 46, 46, 45, 45, 45, 44,
+        44, 43, 42, 41, 40, 40, 49, 51, 53, 50, 47, 45, 44, 43, 43, 42, 42, 41,
+        41, 40, 39, 39, 48, 50, 52, 50, 47, 45, 43, 42, 42, 41, 41, 40, 39, 39,
+        38, 38, 47, 50, 52, 49, 47, 45, 43, 42, 40, 40, 39, 39, 38, 38, 37, 37,
+        46, 48, 51, 48, 46, 44, 42, 41, 40, 39, 38, 38, 37, 37, 37, 37, 44, 47,
+        49, 47, 45, 44, 42, 41, 39, 38, 38, 37, 37, 36, 36, 36, 43, 45, 48, 46,
+        45, 43, 41, 40, 39, 38, 37, 37, 36, 36, 35, 35, 42, 44, 46, 45, 44, 42,
+        41, 39, 38, 37, 37, 36, 35, 35, 35, 35, 41, 43, 45, 44, 43, 41, 40, 39,
+        38, 37, 36, 36, 35, 35, 34, 34, 39, 41, 43, 42, 41, 40, 39, 38, 37, 37,
+        36, 35, 35, 34, 34, 34, 39, 41, 43, 42, 41, 40, 39, 38, 37, 37, 36, 35,
+        35, 34, 34, 34,
+        /* Size 32 */
+        64, 67, 69, 72, 74, 68, 62, 56, 50, 50, 50, 49, 49, 48, 48, 48, 47, 46,
+        46, 45, 44, 44, 43, 42, 42, 41, 41, 40, 39, 39, 39, 39, 67, 67, 68, 69,
+        70, 65, 60, 55, 50, 50, 50, 50, 50, 49, 49, 49, 48, 48, 47, 46, 46, 45,
+        44, 44, 43, 42, 42, 41, 40, 40, 40, 40, 69, 68, 68, 67, 66, 62, 58, 54,
+        50, 50, 51, 51, 51, 51, 50, 50, 50, 49, 48, 48, 47, 46, 45, 45, 44, 43,
+        43, 42, 41, 41, 41, 41, 72, 69, 67, 64, 62, 59, 56, 53, 50, 51, 51, 51,
+        52, 52, 51, 51, 51, 50, 49, 49, 48, 47, 47, 46, 45, 44, 44, 43, 42, 42,
+        42, 42, 74, 70, 66, 62, 58, 56, 54, 52, 50, 51, 51, 52, 53, 53, 52, 52,
+        52, 51, 51, 50, 49, 48, 48, 47, 46, 45, 45, 44, 43, 43, 43, 43, 68, 65,
+        62, 59, 56, 54, 52, 51, 49, 50, 50, 51, 51, 51, 51, 51, 51, 50, 50, 49,
+        48, 48, 47, 46, 45, 45, 44, 43, 43, 43, 43, 43, 62, 60, 58, 56, 54, 52,
+        51, 49, 48, 48, 49, 49, 50, 50, 50, 49, 49, 49, 48, 48, 47, 47, 46, 45,
+        45, 44, 44, 43, 42, 42, 42, 42, 56, 55, 54, 53, 52, 51, 49, 48, 47, 47,
+        48, 48, 48, 48, 48, 48, 48, 48, 47, 47, 46, 46, 45, 45, 44, 44, 43, 42,
+        42, 42, 42, 42, 50, 50, 50, 50, 50, 49, 48, 47, 46, 46, 46, 46, 47, 47,
+        47, 47, 47, 46, 46, 46, 45, 45, 45, 44, 44, 43, 43, 42, 41, 41, 41, 41,
+        50, 50, 50, 51, 51, 50, 48, 47, 46, 46, 46, 46, 46, 46, 46, 46, 46, 45,
+        45, 45, 45, 44, 44, 43, 43, 42, 42, 41, 41, 41, 41, 41, 50, 50, 51, 51,
+        51, 50, 49, 48, 46, 46, 46, 45, 45, 45, 45, 45, 45, 44, 44, 44, 44, 43,
+        43, 42, 42, 42, 41, 41, 40, 40, 40, 40, 49, 50, 51, 51, 52, 51, 49, 48,
+        46, 46, 45, 45, 44, 44, 44, 44, 44, 43, 43, 43, 43, 42, 42, 42, 41, 41,
+        41, 40, 40, 40, 40, 40, 49, 50, 51, 52, 53, 51, 50, 48, 47, 46, 45, 44,
+        44, 43, 43, 43, 43, 42, 42, 42, 42, 42, 41, 41, 41, 40, 40, 40, 39, 39,
+        39, 39, 48, 49, 51, 52, 53, 51, 50, 48, 47, 46, 45, 44, 43, 43, 43, 42,
+        42, 42, 42, 41, 41, 41, 41, 40, 40, 40, 39, 39, 39, 39, 39, 39, 48, 49,
+        50, 51, 52, 51, 50, 48, 47, 46, 45, 44, 43, 43, 42, 42, 42, 41, 41, 41,
+        41, 40, 40, 40, 39, 39, 39, 39, 38, 38, 38, 38, 48, 49, 50, 51, 52, 51,
+        49, 48, 47, 46, 45, 44, 43, 42, 42, 42, 41, 41, 40, 40, 40, 40, 39, 39,
+        39, 39, 38, 38, 38, 38, 38, 38, 47, 48, 50, 51, 52, 51, 49, 48, 47, 46,
+        45, 44, 43, 42, 42, 41, 40, 40, 40, 40, 39, 39, 39, 39, 38, 38, 38, 38,
+        37, 37, 37, 37, 46, 48, 49, 50, 51, 50, 49, 48, 46, 45, 44, 43, 42, 42,
+        41, 41, 40, 40, 40, 39, 39, 39, 38, 38, 38, 38, 37, 37, 37, 37, 37, 37,
+        46, 47, 48, 49, 51, 50, 48, 47, 46, 45, 44, 43, 42, 42, 41, 40, 40, 40,
+        39, 39, 38, 38, 38, 38, 37, 37, 37, 37, 37, 37, 37, 37, 45, 46, 48, 49,
+        50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 41, 40, 40, 39, 39, 38, 38, 38,
+        38, 37, 37, 37, 37, 36, 36, 36, 36, 36, 44, 46, 47, 48, 49, 48, 47, 46,
+        45, 45, 44, 43, 42, 41, 41, 40, 39, 39, 38, 38, 38, 37, 37, 37, 37, 36,
+        36, 36, 36, 36, 36, 36, 44, 45, 46, 47, 48, 48, 47, 46, 45, 44, 43, 42,
+        42, 41, 40, 40, 39, 39, 38, 38, 37, 37, 37, 37, 36, 36, 36, 36, 35, 35,
+        35, 35, 43, 44, 45, 47, 48, 47, 46, 45, 45, 44, 43, 42, 41, 41, 40, 39,
+        39, 38, 38, 38, 37, 37, 37, 36, 36, 36, 36, 35, 35, 35, 35, 35, 42, 44,
+        45, 46, 47, 46, 45, 45, 44, 43, 42, 42, 41, 40, 40, 39, 39, 38, 38, 37,
+        37, 37, 36, 36, 36, 36, 35, 35, 35, 35, 35, 35, 42, 43, 44, 45, 46, 45,
+        45, 44, 44, 43, 42, 41, 41, 40, 39, 39, 38, 38, 37, 37, 37, 36, 36, 36,
+        35, 35, 35, 35, 35, 35, 35, 35, 41, 42, 43, 44, 45, 45, 44, 44, 43, 42,
+        42, 41, 40, 40, 39, 39, 38, 38, 37, 37, 36, 36, 36, 36, 35, 35, 35, 35,
+        34, 34, 34, 34, 41, 42, 43, 44, 45, 44, 44, 43, 43, 42, 41, 41, 40, 39,
+        39, 38, 38, 37, 37, 37, 36, 36, 36, 35, 35, 35, 35, 34, 34, 34, 34, 34,
+        40, 41, 42, 43, 44, 43, 43, 42, 42, 41, 41, 40, 40, 39, 39, 38, 38, 37,
+        37, 36, 36, 36, 35, 35, 35, 35, 34, 34, 34, 34, 34, 34, 39, 40, 41, 42,
+        43, 43, 42, 42, 41, 41, 40, 40, 39, 39, 38, 38, 37, 37, 37, 36, 36, 35,
+        35, 35, 35, 34, 34, 34, 34, 34, 34, 34, 39, 40, 41, 42, 43, 43, 42, 42,
+        41, 41, 40, 40, 39, 39, 38, 38, 37, 37, 37, 36, 36, 35, 35, 35, 35, 34,
+        34, 34, 34, 34, 34, 34, 39, 40, 41, 42, 43, 43, 42, 42, 41, 41, 40, 40,
+        39, 39, 38, 38, 37, 37, 37, 36, 36, 35, 35, 35, 35, 34, 34, 34, 34, 34,
+        34, 34, 39, 40, 41, 42, 43, 43, 42, 42, 41, 41, 40, 40, 39, 39, 38, 38,
+        37, 37, 37, 36, 36, 35, 35, 35, 35, 34, 34, 34, 34, 34, 34, 34 },
+      { /* Intra matrices */
+        /* Size 4 */
+        103, 76, 72, 63, 76, 68, 66, 61, 72, 66, 58, 54, 63, 61, 54, 51,
+        /* Size 8 */
+        98, 114, 76, 73, 71, 66, 62, 58, 114, 88, 75, 80, 78, 74, 69, 64, 76,
+        75, 69, 70, 70, 68, 65, 62, 73, 80, 70, 65, 64, 62, 60, 58, 71, 78, 70,
+        64, 60, 58, 57, 55, 66, 74, 68, 62, 58, 56, 54, 53, 62, 69, 65, 60, 57,
+        54, 52, 51, 58, 64, 62, 58, 55, 53, 51, 49,
+        /* Size 16 */
+        99, 107, 115, 96, 77, 76, 74, 73, 72, 69, 67, 65, 63, 61, 59, 59, 107,
+        105, 102, 90, 77, 77, 78, 77, 76, 73, 71, 69, 67, 64, 62, 62, 115, 102,
+        89, 83, 76, 79, 81, 80, 80, 77, 75, 73, 70, 68, 65, 65, 96, 90, 83, 78,
+        73, 74, 76, 76, 75, 74, 72, 70, 68, 66, 64, 64, 77, 77, 76, 73, 70, 70,
+        71, 71, 71, 70, 69, 68, 66, 64, 63, 63, 76, 77, 79, 74, 70, 69, 69, 68,
+        68, 67, 66, 65, 64, 62, 61, 61, 74, 78, 81, 76, 71, 69, 66, 65, 65, 64,
+        63, 62, 61, 60, 59, 59, 73, 77, 80, 76, 71, 68, 65, 64, 63, 62, 61, 60,
+        59, 58, 57, 57, 72, 76, 80, 75, 71, 68, 65, 63, 61, 60, 59, 58, 57, 57,
+        56, 56, 69, 73, 77, 74, 70, 67, 64, 62, 60, 59, 58, 57, 56, 55, 55, 55,
+        67, 71, 75, 72, 69, 66, 63, 61, 59, 58, 56, 56, 55, 54, 53, 53, 65, 69,
+        73, 70, 68, 65, 62, 60, 58, 57, 56, 55, 54, 53, 52, 52, 63, 67, 70, 68,
+        66, 64, 61, 59, 57, 56, 55, 54, 53, 52, 51, 51, 61, 64, 68, 66, 64, 62,
+        60, 58, 57, 55, 54, 53, 52, 51, 51, 51, 59, 62, 65, 64, 63, 61, 59, 57,
+        56, 55, 53, 52, 51, 51, 50, 50, 59, 62, 65, 64, 63, 61, 59, 57, 56, 55,
+        53, 52, 51, 51, 50, 50,
+        /* Size 32 */
+        100, 104, 108, 112, 116, 107, 97, 87, 78, 77, 76, 76, 75, 74, 74, 73,
+        72, 71, 70, 69, 68, 67, 66, 65, 63, 63, 62, 61, 60, 60, 60, 60, 104,
+        105, 107, 108, 110, 102, 94, 86, 78, 77, 77, 77, 77, 76, 75, 75, 74, 73,
+        72, 71, 70, 69, 68, 66, 65, 64, 63, 62, 61, 61, 61, 61, 108, 107, 106,
+        104, 103, 97, 90, 84, 77, 78, 78, 78, 78, 78, 77, 77, 76, 75, 74, 73,
+        72, 71, 69, 68, 67, 66, 65, 64, 63, 63, 63, 63, 112, 108, 104, 100, 96,
+        92, 87, 82, 77, 78, 79, 79, 80, 80, 79, 79, 78, 77, 76, 75, 74, 73, 71,
+        70, 69, 68, 67, 65, 64, 64, 64, 64, 116, 110, 103, 96, 90, 87, 83, 80,
+        77, 78, 79, 81, 82, 81, 81, 81, 80, 79, 78, 77, 76, 74, 73, 72, 71, 69,
+        68, 67, 66, 66, 66, 66, 107, 102, 97, 92, 87, 84, 81, 78, 75, 76, 77,
+        78, 79, 79, 79, 78, 78, 77, 76, 75, 74, 73, 72, 71, 70, 68, 67, 66, 65,
+        65, 65, 65, 97, 94, 90, 87, 83, 81, 78, 76, 74, 74, 75, 76, 77, 76, 76,
+        76, 76, 75, 74, 73, 73, 72, 71, 70, 69, 68, 67, 65, 64, 64, 64, 64, 87,
+        86, 84, 82, 80, 78, 76, 74, 72, 72, 73, 73, 74, 74, 74, 74, 74, 73, 72,
+        72, 71, 70, 69, 68, 67, 67, 66, 65, 64, 64, 64, 64, 78, 78, 77, 77, 77,
+        75, 74, 72, 70, 70, 71, 71, 72, 72, 72, 72, 72, 71, 71, 70, 70, 69, 68,
+        67, 66, 66, 65, 64, 63, 63, 63, 63, 77, 77, 78, 78, 78, 76, 74, 72, 70,
+        70, 70, 70, 70, 70, 70, 70, 70, 70, 69, 69, 68, 67, 67, 66, 65, 64, 64,
+        63, 62, 62, 62, 62, 76, 77, 78, 79, 79, 77, 75, 73, 71, 70, 70, 70, 69,
+        69, 69, 69, 68, 68, 67, 67, 67, 66, 65, 65, 64, 63, 63, 62, 61, 61, 61,
+        61, 76, 77, 78, 79, 81, 78, 76, 73, 71, 70, 70, 69, 68, 68, 67, 67, 67,
+        66, 66, 66, 65, 65, 64, 63, 63, 62, 62, 61, 60, 60, 60, 60, 75, 77, 78,
+        80, 82, 79, 77, 74, 72, 70, 69, 68, 67, 66, 66, 65, 65, 65, 64, 64, 64,
+        63, 63, 62, 62, 61, 61, 60, 60, 60, 60, 60, 74, 76, 78, 80, 81, 79, 76,
+        74, 72, 70, 69, 68, 66, 66, 65, 65, 64, 64, 63, 63, 63, 62, 62, 61, 61,
+        60, 60, 59, 59, 59, 59, 59, 74, 75, 77, 79, 81, 79, 76, 74, 72, 70, 69,
+        67, 66, 65, 65, 64, 63, 63, 62, 62, 62, 61, 61, 60, 60, 59, 59, 58, 58,
+        58, 58, 58, 73, 75, 77, 79, 81, 78, 76, 74, 72, 70, 69, 67, 65, 65, 64,
+        63, 62, 62, 61, 61, 60, 60, 60, 59, 59, 58, 58, 58, 57, 57, 57, 57, 72,
+        74, 76, 78, 80, 78, 76, 74, 72, 70, 68, 67, 65, 64, 63, 62, 61, 61, 60,
+        60, 59, 59, 59, 58, 58, 57, 57, 57, 56, 56, 56, 56, 71, 73, 75, 77, 79,
+        77, 75, 73, 71, 70, 68, 66, 65, 64, 63, 62, 61, 60, 60, 59, 59, 58, 58,
+        58, 57, 57, 56, 56, 56, 56, 56, 56, 70, 72, 74, 76, 78, 76, 74, 72, 71,
+        69, 67, 66, 64, 63, 62, 61, 60, 60, 59, 59, 58, 58, 57, 57, 56, 56, 56,
+        55, 55, 55, 55, 55, 69, 71, 73, 75, 77, 75, 73, 72, 70, 69, 67, 66, 64,
+        63, 62, 61, 60, 59, 59, 58, 57, 57, 57, 56, 56, 55, 55, 55, 54, 54, 54,
+        54, 68, 70, 72, 74, 76, 74, 73, 71, 70, 68, 67, 65, 64, 63, 62, 60, 59,
+        59, 58, 57, 57, 56, 56, 56, 55, 55, 54, 54, 54, 54, 54, 54, 67, 69, 71,
+        73, 74, 73, 72, 70, 69, 67, 66, 65, 63, 62, 61, 60, 59, 58, 58, 57, 56,
+        56, 56, 55, 55, 54, 54, 54, 53, 53, 53, 53, 66, 68, 69, 71, 73, 72, 71,
+        69, 68, 67, 65, 64, 63, 62, 61, 60, 59, 58, 57, 57, 56, 56, 55, 55, 54,
+        54, 53, 53, 53, 53, 53, 53, 65, 66, 68, 70, 72, 71, 70, 68, 67, 66, 65,
+        63, 62, 61, 60, 59, 58, 58, 57, 56, 56, 55, 55, 54, 54, 53, 53, 53, 52,
+        52, 52, 52, 63, 65, 67, 69, 71, 70, 69, 67, 66, 65, 64, 63, 62, 61, 60,
+        59, 58, 57, 56, 56, 55, 55, 54, 54, 53, 53, 53, 52, 52, 52, 52, 52, 63,
+        64, 66, 68, 69, 68, 68, 67, 66, 64, 63, 62, 61, 60, 59, 58, 57, 57, 56,
+        55, 55, 54, 54, 53, 53, 53, 52, 52, 52, 52, 52, 52, 62, 63, 65, 67, 68,
+        67, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 56, 55, 54, 54, 53,
+        53, 53, 52, 52, 52, 51, 51, 51, 51, 61, 62, 64, 65, 67, 66, 65, 65, 64,
+        63, 62, 61, 60, 59, 58, 58, 57, 56, 55, 55, 54, 54, 53, 53, 52, 52, 52,
+        51, 51, 51, 51, 51, 60, 61, 63, 64, 66, 65, 64, 64, 63, 62, 61, 60, 60,
+        59, 58, 57, 56, 56, 55, 54, 54, 53, 53, 52, 52, 52, 51, 51, 51, 51, 51,
+        51, 60, 61, 63, 64, 66, 65, 64, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56,
+        56, 55, 54, 54, 53, 53, 52, 52, 52, 51, 51, 51, 51, 51, 51, 60, 61, 63,
+        64, 66, 65, 64, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 56, 55, 54, 54,
+        53, 53, 52, 52, 52, 51, 51, 51, 51, 51, 51, 60, 61, 63, 64, 66, 65, 64,
+        64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 56, 55, 54, 54, 53, 53, 52, 52,
+        52, 51, 51, 51, 51, 51, 51 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 61, 47, 39, 61, 49, 42, 38, 47, 42, 38, 35, 39, 38, 35, 34,
+        /* Size 8 */
+        64, 76, 72, 62, 53, 47, 44, 41, 76, 70, 72, 65, 57, 51, 46, 43, 72, 72,
+        59, 55, 51, 47, 44, 42, 62, 65, 55, 49, 46, 44, 42, 41, 53, 57, 51, 46,
+        43, 42, 41, 40, 47, 51, 47, 44, 42, 40, 39, 39, 44, 46, 44, 42, 41, 39,
+        39, 38, 41, 43, 42, 41, 40, 39, 38, 37,
+        /* Size 16 */
+        64, 70, 76, 74, 72, 67, 62, 57, 53, 50, 47, 46, 44, 43, 41, 41, 70, 71,
+        73, 72, 72, 68, 63, 59, 55, 52, 49, 47, 45, 44, 42, 42, 76, 73, 70, 71,
+        72, 68, 65, 61, 57, 54, 51, 49, 46, 45, 43, 43, 74, 72, 71, 68, 65, 63,
+        60, 57, 54, 52, 49, 47, 45, 44, 43, 43, 72, 72, 72, 65, 59, 57, 55, 53,
+        51, 49, 47, 46, 44, 43, 42, 42, 67, 68, 68, 63, 57, 54, 52, 50, 49, 47,
+        46, 44, 43, 42, 41, 41, 62, 63, 65, 60, 55, 52, 49, 48, 46, 45, 44, 43,
+        42, 41, 41, 41, 57, 59, 61, 57, 53, 50, 48, 46, 45, 44, 43, 42, 41, 41,
+        40, 40, 53, 55, 57, 54, 51, 49, 46, 45, 43, 43, 42, 41, 41, 40, 40, 40,
+        50, 52, 54, 52, 49, 47, 45, 44, 43, 42, 41, 40, 40, 40, 39, 39, 47, 49,
+        51, 49, 47, 46, 44, 43, 42, 41, 40, 40, 39, 39, 39, 39, 46, 47, 49, 47,
+        46, 44, 43, 42, 41, 40, 40, 39, 39, 39, 38, 38, 44, 45, 46, 45, 44, 43,
+        42, 41, 41, 40, 39, 39, 39, 38, 38, 38, 43, 44, 45, 44, 43, 42, 41, 41,
+        40, 40, 39, 39, 38, 38, 38, 38, 41, 42, 43, 43, 42, 41, 41, 40, 40, 39,
+        39, 38, 38, 38, 37, 37, 41, 42, 43, 43, 42, 41, 41, 40, 40, 39, 39, 38,
+        38, 38, 37, 37,
+        /* Size 32 */
+        64, 67, 70, 73, 76, 75, 74, 73, 72, 70, 67, 64, 62, 60, 57, 55, 53, 52,
+        50, 49, 47, 47, 46, 45, 44, 43, 43, 42, 41, 41, 41, 41, 67, 69, 71, 72,
+        74, 74, 73, 73, 72, 70, 67, 65, 63, 60, 58, 56, 54, 53, 51, 50, 48, 47,
+        46, 45, 44, 44, 43, 42, 42, 42, 42, 42, 70, 71, 71, 72, 73, 73, 72, 72,
+        72, 70, 68, 66, 63, 61, 59, 57, 55, 54, 52, 51, 49, 48, 47, 46, 45, 44,
+        44, 43, 42, 42, 42, 42, 73, 72, 72, 72, 71, 71, 71, 72, 72, 70, 68, 66,
+        64, 62, 60, 58, 56, 55, 53, 52, 50, 49, 48, 47, 46, 45, 44, 43, 43, 43,
+        43, 43, 76, 74, 73, 71, 70, 70, 71, 71, 72, 70, 68, 67, 65, 63, 61, 59,
+        57, 56, 54, 52, 51, 50, 49, 47, 46, 46, 45, 44, 43, 43, 43, 43, 75, 74,
+        73, 71, 70, 70, 69, 69, 68, 67, 66, 64, 63, 61, 59, 57, 56, 54, 53, 51,
+        50, 49, 48, 47, 46, 45, 44, 44, 43, 43, 43, 43, 74, 73, 72, 71, 71, 69,
+        68, 67, 65, 64, 63, 61, 60, 58, 57, 56, 54, 53, 52, 50, 49, 48, 47, 46,
+        45, 45, 44, 43, 43, 43, 43, 43, 73, 73, 72, 72, 71, 69, 67, 64, 62, 61,
+        60, 59, 57, 56, 55, 54, 52, 51, 50, 49, 48, 47, 47, 46, 45, 44, 44, 43,
+        42, 42, 42, 42, 72, 72, 72, 72, 72, 68, 65, 62, 59, 58, 57, 56, 55, 54,
+        53, 52, 51, 50, 49, 48, 47, 47, 46, 45, 44, 44, 43, 43, 42, 42, 42, 42,
+        70, 70, 70, 70, 70, 67, 64, 61, 58, 57, 56, 55, 53, 52, 52, 51, 50, 49,
+        48, 47, 47, 46, 45, 45, 44, 43, 43, 42, 42, 42, 42, 42, 67, 67, 68, 68,
+        68, 66, 63, 60, 57, 56, 54, 53, 52, 51, 50, 49, 49, 48, 47, 46, 46, 45,
+        44, 44, 43, 43, 42, 42, 41, 41, 41, 41, 64, 65, 66, 66, 67, 64, 61, 59,
+        56, 55, 53, 52, 51, 50, 49, 48, 47, 47, 46, 45, 45, 44, 44, 43, 43, 42,
+        42, 42, 41, 41, 41, 41, 62, 63, 63, 64, 65, 63, 60, 57, 55, 53, 52, 51,
+        49, 48, 48, 47, 46, 46, 45, 45, 44, 44, 43, 43, 42, 42, 41, 41, 41, 41,
+        41, 41, 60, 60, 61, 62, 63, 61, 58, 56, 54, 52, 51, 50, 48, 48, 47, 46,
+        46, 45, 44, 44, 43, 43, 43, 42, 42, 41, 41, 41, 40, 40, 40, 40, 57, 58,
+        59, 60, 61, 59, 57, 55, 53, 52, 50, 49, 48, 47, 46, 46, 45, 44, 44, 43,
+        43, 43, 42, 42, 41, 41, 41, 40, 40, 40, 40, 40, 55, 56, 57, 58, 59, 57,
+        56, 54, 52, 51, 49, 48, 47, 46, 46, 45, 44, 44, 43, 43, 42, 42, 42, 41,
+        41, 41, 40, 40, 40, 40, 40, 40, 53, 54, 55, 56, 57, 56, 54, 52, 51, 50,
+        49, 47, 46, 46, 45, 44, 43, 43, 43, 42, 42, 41, 41, 41, 41, 40, 40, 40,
+        40, 40, 40, 40, 52, 53, 54, 55, 56, 54, 53, 51, 50, 49, 48, 47, 46, 45,
+        44, 44, 43, 43, 42, 42, 41, 41, 41, 41, 40, 40, 40, 40, 39, 39, 39, 39,
+        50, 51, 52, 53, 54, 53, 52, 50, 49, 48, 47, 46, 45, 44, 44, 43, 43, 42,
+        42, 41, 41, 41, 40, 40, 40, 40, 40, 39, 39, 39, 39, 39, 49, 50, 51, 52,
+        52, 51, 50, 49, 48, 47, 46, 45, 45, 44, 43, 43, 42, 42, 41, 41, 41, 40,
+        40, 40, 40, 39, 39, 39, 39, 39, 39, 39, 47, 48, 49, 50, 51, 50, 49, 48,
+        47, 47, 46, 45, 44, 43, 43, 42, 42, 41, 41, 41, 40, 40, 40, 40, 39, 39,
+        39, 39, 39, 39, 39, 39, 47, 47, 48, 49, 50, 49, 48, 47, 47, 46, 45, 44,
+        44, 43, 43, 42, 41, 41, 41, 40, 40, 40, 40, 39, 39, 39, 39, 39, 38, 38,
+        38, 38, 46, 46, 47, 48, 49, 48, 47, 47, 46, 45, 44, 44, 43, 43, 42, 42,
+        41, 41, 40, 40, 40, 40, 39, 39, 39, 39, 39, 38, 38, 38, 38, 38, 45, 45,
+        46, 47, 47, 47, 46, 46, 45, 45, 44, 43, 43, 42, 42, 41, 41, 41, 40, 40,
+        40, 39, 39, 39, 39, 39, 38, 38, 38, 38, 38, 38, 44, 44, 45, 46, 46, 46,
+        45, 45, 44, 44, 43, 43, 42, 42, 41, 41, 41, 40, 40, 40, 39, 39, 39, 39,
+        39, 38, 38, 38, 38, 38, 38, 38, 43, 44, 44, 45, 46, 45, 45, 44, 44, 43,
+        43, 42, 42, 41, 41, 41, 40, 40, 40, 39, 39, 39, 39, 39, 38, 38, 38, 38,
+        38, 38, 38, 38, 43, 43, 44, 44, 45, 44, 44, 44, 43, 43, 42, 42, 41, 41,
+        41, 40, 40, 40, 40, 39, 39, 39, 39, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+        42, 42, 43, 43, 44, 44, 43, 43, 43, 42, 42, 42, 41, 41, 40, 40, 40, 40,
+        39, 39, 39, 39, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 41, 42, 42, 43,
+        43, 43, 43, 42, 42, 42, 41, 41, 41, 40, 40, 40, 40, 39, 39, 39, 39, 38,
+        38, 38, 38, 38, 38, 38, 37, 37, 37, 37, 41, 42, 42, 43, 43, 43, 43, 42,
+        42, 42, 41, 41, 41, 40, 40, 40, 40, 39, 39, 39, 39, 38, 38, 38, 38, 38,
+        38, 38, 37, 37, 37, 37, 41, 42, 42, 43, 43, 43, 43, 42, 42, 42, 41, 41,
+        41, 40, 40, 40, 40, 39, 39, 39, 39, 38, 38, 38, 38, 38, 38, 38, 37, 37,
+        37, 37, 41, 42, 42, 43, 43, 43, 43, 42, 42, 42, 41, 41, 41, 40, 40, 40,
+        40, 39, 39, 39, 39, 38, 38, 38, 38, 38, 38, 38, 37, 37, 37, 37 },
+      { /* Intra matrices */
+        /* Size 4 */
+        100, 95, 72, 60, 95, 76, 65, 58, 72, 65, 57, 54, 60, 58, 54, 51,
+        /* Size 8 */
+        90, 107, 102, 86, 74, 65, 60, 56, 107, 98, 101, 92, 80, 70, 64, 59, 102,
+        101, 83, 76, 71, 65, 61, 58, 86, 92, 76, 68, 64, 60, 58, 56, 74, 80, 71,
+        64, 60, 57, 55, 54, 65, 70, 65, 60, 57, 55, 53, 52, 60, 64, 61, 58, 55,
+        53, 52, 51, 56, 59, 58, 56, 54, 52, 51, 51,
+        /* Size 16 */
+        92, 100, 109, 107, 104, 96, 88, 82, 75, 71, 67, 64, 61, 59, 57, 57, 100,
+        102, 105, 104, 103, 97, 91, 84, 78, 74, 69, 66, 63, 61, 59, 59, 109,
+        105, 100, 101, 103, 98, 93, 87, 81, 76, 72, 68, 65, 63, 60, 60, 107,
+        104, 101, 97, 94, 89, 85, 81, 77, 73, 69, 66, 63, 61, 59, 59, 104, 103,
+        103, 94, 84, 81, 78, 75, 72, 69, 66, 64, 62, 60, 59, 59, 96, 97, 98, 89,
+        81, 77, 73, 71, 68, 66, 64, 62, 60, 59, 58, 58, 88, 91, 93, 85, 78, 73,
+        69, 67, 65, 63, 62, 60, 59, 58, 57, 57, 82, 84, 87, 81, 75, 71, 67, 65,
+        63, 61, 60, 59, 58, 57, 56, 56, 75, 78, 81, 77, 72, 68, 65, 63, 61, 59,
+        58, 57, 56, 55, 55, 55, 71, 74, 76, 73, 69, 66, 63, 61, 59, 58, 57, 56,
+        55, 55, 54, 54, 67, 69, 72, 69, 66, 64, 62, 60, 58, 57, 56, 55, 54, 54,
+        53, 53, 64, 66, 68, 66, 64, 62, 60, 59, 57, 56, 55, 54, 54, 53, 53, 53,
+        61, 63, 65, 63, 62, 60, 59, 58, 56, 55, 54, 54, 53, 53, 52, 52, 59, 61,
+        63, 61, 60, 59, 58, 57, 55, 55, 54, 53, 53, 52, 52, 52, 57, 59, 60, 59,
+        59, 58, 57, 56, 55, 54, 53, 53, 52, 52, 52, 52, 57, 59, 60, 59, 59, 58,
+        57, 56, 55, 54, 53, 53, 52, 52, 52, 52,
+        /* Size 32 */
+        92, 97, 101, 106, 110, 109, 107, 106, 105, 101, 97, 93, 89, 86, 82, 79,
+        76, 74, 72, 69, 67, 66, 64, 63, 62, 61, 60, 59, 58, 58, 58, 58, 97, 100,
+        102, 105, 108, 107, 106, 105, 104, 101, 97, 94, 90, 87, 84, 81, 77, 75,
+        73, 71, 69, 67, 66, 64, 63, 62, 61, 60, 59, 59, 59, 59, 101, 102, 103,
+        105, 106, 105, 105, 105, 104, 101, 98, 95, 91, 88, 85, 82, 79, 77, 74,
+        72, 70, 68, 67, 65, 64, 63, 61, 60, 59, 59, 59, 59, 106, 105, 105, 104,
+        103, 103, 104, 104, 104, 101, 98, 96, 93, 90, 87, 84, 80, 78, 76, 73,
+        71, 69, 68, 66, 65, 63, 62, 61, 60, 60, 60, 60, 110, 108, 106, 103, 101,
+        102, 102, 103, 104, 101, 99, 97, 94, 91, 88, 85, 82, 80, 77, 75, 72, 71,
+        69, 67, 66, 64, 63, 62, 61, 61, 61, 61, 109, 107, 105, 103, 102, 101,
+        100, 100, 99, 97, 95, 92, 90, 88, 85, 82, 80, 77, 75, 73, 71, 69, 68,
+        66, 65, 64, 63, 61, 60, 60, 60, 60, 107, 106, 105, 104, 102, 100, 98,
+        96, 94, 92, 90, 88, 86, 84, 82, 79, 77, 75, 73, 72, 70, 68, 67, 65, 64,
+        63, 62, 61, 60, 60, 60, 60, 106, 105, 105, 104, 103, 100, 96, 93, 90,
+        88, 86, 84, 82, 80, 79, 77, 75, 73, 72, 70, 68, 67, 66, 65, 63, 62, 61,
+        60, 60, 60, 60, 60, 105, 104, 104, 104, 104, 99, 94, 90, 85, 83, 82, 80,
+        78, 77, 75, 74, 73, 71, 70, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 59,
+        59, 59, 101, 101, 101, 101, 101, 97, 92, 88, 83, 82, 80, 78, 76, 75, 73,
+        72, 71, 69, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 59, 59, 59, 59, 97,
+        97, 98, 98, 99, 95, 90, 86, 82, 80, 78, 76, 74, 73, 71, 70, 69, 68, 67,
+        66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 58, 58, 58, 93, 94, 95, 96, 97,
+        92, 88, 84, 80, 78, 76, 74, 72, 71, 70, 68, 67, 66, 65, 64, 63, 63, 62,
+        61, 60, 59, 59, 58, 58, 58, 58, 58, 89, 90, 91, 93, 94, 90, 86, 82, 78,
+        76, 74, 72, 70, 69, 68, 66, 65, 65, 64, 63, 62, 61, 61, 60, 59, 59, 58,
+        58, 57, 57, 57, 57, 86, 87, 88, 90, 91, 88, 84, 80, 77, 75, 73, 71, 69,
+        68, 66, 65, 64, 64, 63, 62, 61, 61, 60, 59, 59, 58, 58, 57, 57, 57, 57,
+        57, 82, 84, 85, 87, 88, 85, 82, 79, 75, 73, 71, 70, 68, 66, 65, 64, 63,
+        63, 62, 61, 60, 60, 59, 59, 58, 58, 57, 57, 56, 56, 56, 56, 79, 81, 82,
+        84, 85, 82, 79, 77, 74, 72, 70, 68, 66, 65, 64, 63, 62, 62, 61, 60, 59,
+        59, 58, 58, 57, 57, 57, 56, 56, 56, 56, 56, 76, 77, 79, 80, 82, 80, 77,
+        75, 73, 71, 69, 67, 65, 64, 63, 62, 61, 61, 60, 59, 59, 58, 58, 57, 57,
+        56, 56, 56, 55, 55, 55, 55, 74, 75, 77, 78, 80, 77, 75, 73, 71, 69, 68,
+        66, 65, 64, 63, 62, 61, 60, 59, 59, 58, 58, 57, 57, 56, 56, 56, 55, 55,
+        55, 55, 55, 72, 73, 74, 76, 77, 75, 73, 72, 70, 68, 67, 65, 64, 63, 62,
+        61, 60, 59, 59, 58, 57, 57, 57, 56, 56, 55, 55, 55, 55, 55, 55, 55, 69,
+        71, 72, 73, 75, 73, 72, 70, 68, 67, 66, 64, 63, 62, 61, 60, 59, 59, 58,
+        58, 57, 57, 56, 56, 55, 55, 55, 54, 54, 54, 54, 54, 67, 69, 70, 71, 72,
+        71, 70, 68, 67, 66, 65, 63, 62, 61, 60, 59, 59, 58, 57, 57, 56, 56, 56,
+        55, 55, 55, 54, 54, 54, 54, 54, 54, 66, 67, 68, 69, 71, 69, 68, 67, 66,
+        65, 64, 63, 61, 61, 60, 59, 58, 58, 57, 57, 56, 56, 55, 55, 55, 54, 54,
+        54, 54, 54, 54, 54, 64, 66, 67, 68, 69, 68, 67, 66, 65, 64, 63, 62, 61,
+        60, 59, 58, 58, 57, 57, 56, 56, 55, 55, 55, 54, 54, 54, 54, 53, 53, 53,
+        53, 63, 64, 65, 66, 67, 66, 65, 65, 64, 63, 62, 61, 60, 59, 59, 58, 57,
+        57, 56, 56, 55, 55, 55, 54, 54, 54, 54, 53, 53, 53, 53, 53, 62, 63, 64,
+        65, 66, 65, 64, 63, 63, 62, 61, 60, 59, 59, 58, 57, 57, 56, 56, 55, 55,
+        55, 54, 54, 54, 53, 53, 53, 53, 53, 53, 53, 61, 62, 63, 63, 64, 64, 63,
+        62, 62, 61, 60, 59, 59, 58, 58, 57, 56, 56, 55, 55, 55, 54, 54, 54, 53,
+        53, 53, 53, 53, 53, 53, 53, 60, 61, 61, 62, 63, 63, 62, 61, 61, 60, 60,
+        59, 58, 58, 57, 57, 56, 56, 55, 55, 54, 54, 54, 54, 53, 53, 53, 53, 52,
+        52, 52, 52, 59, 60, 60, 61, 62, 61, 61, 60, 60, 59, 59, 58, 58, 57, 57,
+        56, 56, 55, 55, 54, 54, 54, 54, 53, 53, 53, 53, 52, 52, 52, 52, 52, 58,
+        59, 59, 60, 61, 60, 60, 60, 59, 59, 58, 58, 57, 57, 56, 56, 55, 55, 55,
+        54, 54, 54, 53, 53, 53, 53, 52, 52, 52, 52, 52, 52, 58, 59, 59, 60, 61,
+        60, 60, 60, 59, 59, 58, 58, 57, 57, 56, 56, 55, 55, 55, 54, 54, 54, 53,
+        53, 53, 53, 52, 52, 52, 52, 52, 52, 58, 59, 59, 60, 61, 60, 60, 60, 59,
+        59, 58, 58, 57, 57, 56, 56, 55, 55, 55, 54, 54, 54, 53, 53, 53, 53, 52,
+        52, 52, 52, 52, 52, 58, 59, 59, 60, 61, 60, 60, 60, 59, 59, 58, 58, 57,
+        57, 56, 56, 55, 55, 55, 54, 54, 54, 53, 53, 53, 53, 52, 52, 52, 52, 52,
+        52 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 50, 48, 43, 50, 45, 44, 42, 48, 44, 40, 38, 43, 42, 38, 36,
+        /* Size 8 */
+        64, 73, 52, 50, 49, 47, 44, 42, 73, 59, 51, 54, 53, 51, 48, 45, 52, 51,
+        48, 49, 49, 47, 46, 44, 50, 54, 49, 46, 45, 44, 43, 42, 49, 53, 49, 45,
+        43, 42, 41, 40, 47, 51, 47, 44, 42, 40, 40, 39, 44, 48, 46, 43, 41, 40,
+        39, 38, 42, 45, 44, 42, 40, 39, 38, 37,
+        /* Size 16 */
+        64, 69, 73, 63, 52, 51, 50, 50, 49, 48, 47, 45, 44, 43, 42, 42, 69, 67,
+        66, 59, 52, 52, 52, 52, 51, 50, 49, 47, 46, 45, 44, 44, 73, 66, 59, 55,
+        51, 53, 54, 54, 53, 52, 51, 49, 48, 47, 45, 45, 63, 59, 55, 52, 50, 50,
+        51, 51, 51, 50, 49, 48, 47, 46, 45, 45, 52, 52, 51, 50, 48, 48, 49, 49,
+        49, 48, 47, 47, 46, 45, 44, 44, 51, 52, 53, 50, 48, 48, 47, 47, 47, 46,
+        46, 45, 44, 44, 43, 43, 50, 52, 54, 51, 49, 47, 46, 45, 45, 45, 44, 44,
+        43, 43, 42, 42, 50, 52, 54, 51, 49, 47, 45, 45, 44, 44, 43, 43, 42, 42,
+        41, 41, 49, 51, 53, 51, 49, 47, 45, 44, 43, 42, 42, 41, 41, 41, 40, 40,
+        48, 50, 52, 50, 48, 46, 45, 44, 42, 42, 41, 41, 40, 40, 39, 39, 47, 49,
+        51, 49, 47, 46, 44, 43, 42, 41, 40, 40, 40, 39, 39, 39, 45, 47, 49, 48,
+        47, 45, 44, 43, 41, 41, 40, 40, 39, 39, 38, 38, 44, 46, 48, 47, 46, 44,
+        43, 42, 41, 40, 40, 39, 39, 38, 38, 38, 43, 45, 47, 46, 45, 44, 43, 42,
+        41, 40, 39, 39, 38, 38, 37, 37, 42, 44, 45, 45, 44, 43, 42, 41, 40, 39,
+        39, 38, 38, 37, 37, 37, 42, 44, 45, 45, 44, 43, 42, 41, 40, 39, 39, 38,
+        38, 37, 37, 37,
+        /* Size 32 */
+        64, 66, 69, 71, 73, 68, 63, 57, 52, 52, 51, 51, 50, 50, 50, 49, 49, 48,
+        48, 47, 47, 46, 45, 45, 44, 44, 43, 43, 42, 42, 42, 42, 66, 67, 68, 69,
+        69, 65, 61, 56, 52, 52, 52, 52, 51, 51, 51, 50, 50, 49, 49, 48, 48, 47,
+        46, 46, 45, 45, 44, 43, 43, 43, 43, 43, 69, 68, 67, 67, 66, 62, 59, 55,
+        52, 52, 52, 52, 52, 52, 52, 51, 51, 51, 50, 49, 49, 48, 47, 47, 46, 46,
+        45, 44, 44, 44, 44, 44, 71, 69, 67, 64, 62, 60, 57, 54, 52, 52, 52, 53,
+        53, 53, 53, 52, 52, 52, 51, 50, 50, 49, 48, 48, 47, 46, 46, 45, 45, 45,
+        45, 45, 73, 69, 66, 62, 59, 57, 55, 53, 51, 52, 53, 53, 54, 54, 54, 54,
+        53, 53, 52, 51, 51, 50, 49, 49, 48, 47, 47, 46, 45, 45, 45, 45, 68, 65,
+        62, 60, 57, 55, 54, 52, 51, 51, 52, 52, 53, 53, 52, 52, 52, 52, 51, 51,
+        50, 49, 49, 48, 47, 47, 46, 46, 45, 45, 45, 45, 63, 61, 59, 57, 55, 54,
+        52, 51, 50, 50, 50, 51, 51, 51, 51, 51, 51, 51, 50, 50, 49, 49, 48, 47,
+        47, 46, 46, 45, 45, 45, 45, 45, 57, 56, 55, 54, 53, 52, 51, 50, 49, 49,
+        49, 50, 50, 50, 50, 50, 50, 49, 49, 49, 48, 48, 47, 47, 46, 46, 45, 45,
+        44, 44, 44, 44, 52, 52, 52, 52, 51, 51, 50, 49, 48, 48, 48, 48, 49, 49,
+        49, 49, 49, 48, 48, 48, 47, 47, 47, 46, 46, 45, 45, 44, 44, 44, 44, 44,
+        52, 52, 52, 52, 52, 51, 50, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 47,
+        47, 47, 47, 46, 46, 46, 45, 45, 44, 44, 43, 43, 43, 43, 51, 52, 52, 52,
+        53, 52, 50, 49, 48, 48, 48, 47, 47, 47, 47, 47, 47, 47, 46, 46, 46, 46,
+        45, 45, 44, 44, 44, 43, 43, 43, 43, 43, 51, 52, 52, 53, 53, 52, 51, 50,
+        48, 48, 47, 47, 47, 46, 46, 46, 46, 46, 45, 45, 45, 45, 44, 44, 44, 43,
+        43, 43, 42, 42, 42, 42, 50, 51, 52, 53, 54, 53, 51, 50, 49, 48, 47, 47,
+        46, 46, 45, 45, 45, 45, 45, 44, 44, 44, 44, 43, 43, 43, 43, 42, 42, 42,
+        42, 42, 50, 51, 52, 53, 54, 53, 51, 50, 49, 48, 47, 46, 46, 45, 45, 45,
+        45, 44, 44, 44, 44, 43, 43, 43, 43, 42, 42, 42, 42, 42, 42, 42, 50, 51,
+        52, 53, 54, 52, 51, 50, 49, 48, 47, 46, 45, 45, 45, 44, 44, 44, 44, 43,
+        43, 43, 43, 42, 42, 42, 42, 41, 41, 41, 41, 41, 49, 50, 51, 52, 54, 52,
+        51, 50, 49, 48, 47, 46, 45, 45, 44, 44, 44, 43, 43, 43, 42, 42, 42, 42,
+        42, 41, 41, 41, 41, 41, 41, 41, 49, 50, 51, 52, 53, 52, 51, 50, 49, 48,
+        47, 46, 45, 45, 44, 44, 43, 43, 42, 42, 42, 42, 41, 41, 41, 41, 41, 40,
+        40, 40, 40, 40, 48, 49, 51, 52, 53, 52, 51, 49, 48, 47, 47, 46, 45, 44,
+        44, 43, 43, 42, 42, 42, 42, 41, 41, 41, 41, 40, 40, 40, 40, 40, 40, 40,
+        48, 49, 50, 51, 52, 51, 50, 49, 48, 47, 46, 45, 45, 44, 44, 43, 42, 42,
+        42, 42, 41, 41, 41, 41, 40, 40, 40, 40, 39, 39, 39, 39, 47, 48, 49, 50,
+        51, 51, 50, 49, 48, 47, 46, 45, 44, 44, 43, 43, 42, 42, 42, 41, 41, 41,
+        40, 40, 40, 40, 40, 39, 39, 39, 39, 39, 47, 48, 49, 50, 51, 50, 49, 48,
+        47, 47, 46, 45, 44, 44, 43, 42, 42, 42, 41, 41, 40, 40, 40, 40, 40, 39,
+        39, 39, 39, 39, 39, 39, 46, 47, 48, 49, 50, 49, 49, 48, 47, 46, 46, 45,
+        44, 43, 43, 42, 42, 41, 41, 41, 40, 40, 40, 40, 39, 39, 39, 39, 39, 39,
+        39, 39, 45, 46, 47, 48, 49, 49, 48, 47, 47, 46, 45, 44, 44, 43, 43, 42,
+        41, 41, 41, 40, 40, 40, 40, 39, 39, 39, 39, 38, 38, 38, 38, 38, 45, 46,
+        47, 48, 49, 48, 47, 47, 46, 46, 45, 44, 43, 43, 42, 42, 41, 41, 41, 40,
+        40, 40, 39, 39, 39, 39, 38, 38, 38, 38, 38, 38, 44, 45, 46, 47, 48, 47,
+        47, 46, 46, 45, 44, 44, 43, 43, 42, 42, 41, 41, 40, 40, 40, 39, 39, 39,
+        39, 38, 38, 38, 38, 38, 38, 38, 44, 45, 46, 46, 47, 47, 46, 46, 45, 45,
+        44, 43, 43, 42, 42, 41, 41, 40, 40, 40, 39, 39, 39, 39, 38, 38, 38, 38,
+        38, 38, 38, 38, 43, 44, 45, 46, 47, 46, 46, 45, 45, 44, 44, 43, 43, 42,
+        42, 41, 41, 40, 40, 40, 39, 39, 39, 38, 38, 38, 38, 38, 37, 37, 37, 37,
+        43, 43, 44, 45, 46, 46, 45, 45, 44, 44, 43, 43, 42, 42, 41, 41, 40, 40,
+        40, 39, 39, 39, 38, 38, 38, 38, 38, 37, 37, 37, 37, 37, 42, 43, 44, 45,
+        45, 45, 45, 44, 44, 43, 43, 42, 42, 42, 41, 41, 40, 40, 39, 39, 39, 39,
+        38, 38, 38, 38, 37, 37, 37, 37, 37, 37, 42, 43, 44, 45, 45, 45, 45, 44,
+        44, 43, 43, 42, 42, 42, 41, 41, 40, 40, 39, 39, 39, 39, 38, 38, 38, 38,
+        37, 37, 37, 37, 37, 37, 42, 43, 44, 45, 45, 45, 45, 44, 44, 43, 43, 42,
+        42, 42, 41, 41, 40, 40, 39, 39, 39, 39, 38, 38, 38, 38, 37, 37, 37, 37,
+        37, 37, 42, 43, 44, 45, 45, 45, 45, 44, 44, 43, 43, 42, 42, 42, 41, 41,
+        40, 40, 39, 39, 39, 39, 38, 38, 38, 38, 37, 37, 37, 37, 37, 37 },
+      { /* Intra matrices */
+        /* Size 4 */
+        97, 74, 71, 63, 74, 67, 65, 61, 71, 65, 58, 56, 63, 61, 56, 52,
+        /* Size 8 */
+        92, 106, 74, 72, 69, 66, 62, 59, 106, 84, 73, 77, 76, 72, 68, 64, 74,
+        73, 68, 69, 69, 67, 65, 62, 72, 77, 69, 65, 63, 62, 61, 59, 69, 76, 69,
+        63, 60, 59, 57, 56, 66, 72, 67, 62, 59, 57, 55, 54, 62, 68, 65, 61, 57,
+        55, 54, 53, 59, 64, 62, 59, 56, 54, 53, 51,
+        /* Size 16 */
+        93, 100, 107, 91, 75, 74, 73, 71, 70, 68, 67, 65, 63, 61, 60, 60, 100,
+        98, 96, 85, 74, 75, 75, 74, 74, 72, 70, 68, 66, 64, 62, 62, 107, 96, 85,
+        79, 74, 76, 78, 77, 77, 75, 73, 71, 69, 67, 65, 65, 91, 85, 79, 75, 71,
+        73, 74, 74, 73, 72, 71, 69, 67, 65, 64, 64, 75, 74, 74, 71, 68, 69, 70,
+        70, 70, 69, 68, 67, 65, 64, 63, 63, 74, 75, 76, 73, 69, 68, 68, 67, 67,
+        66, 65, 64, 63, 62, 61, 61, 73, 75, 78, 74, 70, 68, 66, 65, 64, 64, 63,
+        62, 61, 60, 60, 60, 71, 74, 77, 74, 70, 67, 65, 64, 63, 62, 61, 61, 60,
+        59, 58, 58, 70, 74, 77, 73, 70, 67, 64, 63, 61, 60, 59, 59, 58, 58, 57,
+        57, 68, 72, 75, 72, 69, 66, 64, 62, 60, 59, 58, 58, 57, 56, 56, 56, 67,
+        70, 73, 71, 68, 65, 63, 61, 59, 58, 57, 57, 56, 55, 55, 55, 65, 68, 71,
+        69, 67, 64, 62, 61, 59, 58, 57, 56, 55, 55, 54, 54, 63, 66, 69, 67, 65,
+        63, 61, 60, 58, 57, 56, 55, 54, 54, 53, 53, 61, 64, 67, 65, 64, 62, 60,
+        59, 58, 56, 55, 55, 54, 53, 53, 53, 60, 62, 65, 64, 63, 61, 60, 58, 57,
+        56, 55, 54, 53, 53, 52, 52, 60, 62, 65, 64, 63, 61, 60, 58, 57, 56, 55,
+        54, 53, 53, 52, 52,
+        /* Size 32 */
+        94, 97, 101, 104, 108, 100, 91, 83, 75, 75, 74, 74, 73, 72, 72, 71, 71,
+        70, 69, 68, 67, 66, 65, 64, 63, 63, 62, 61, 60, 60, 60, 60, 97, 98, 100,
+        101, 102, 95, 89, 82, 75, 75, 75, 75, 74, 74, 73, 73, 72, 71, 70, 70,
+        69, 68, 67, 66, 65, 64, 63, 62, 61, 61, 61, 61, 101, 100, 99, 98, 97,
+        91, 86, 80, 75, 75, 75, 76, 76, 75, 75, 74, 74, 73, 72, 71, 70, 69, 68,
+        67, 66, 65, 65, 64, 63, 63, 63, 63, 104, 101, 98, 94, 91, 87, 83, 79,
+        75, 75, 76, 77, 77, 77, 76, 76, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67,
+        66, 65, 64, 64, 64, 64, 108, 102, 97, 91, 85, 83, 80, 77, 75, 76, 77,
+        78, 79, 78, 78, 78, 77, 76, 75, 75, 74, 73, 71, 70, 69, 68, 67, 66, 65,
+        65, 65, 65, 100, 95, 91, 87, 83, 80, 78, 76, 73, 74, 75, 76, 76, 76, 76,
+        76, 76, 75, 74, 73, 72, 71, 70, 69, 68, 68, 67, 66, 65, 65, 65, 65, 91,
+        89, 86, 83, 80, 78, 76, 74, 72, 72, 73, 74, 74, 74, 74, 74, 74, 73, 72,
+        72, 71, 70, 69, 68, 68, 67, 66, 65, 64, 64, 64, 64, 83, 82, 80, 79, 77,
+        76, 74, 72, 70, 71, 71, 72, 72, 72, 72, 72, 72, 71, 71, 70, 70, 69, 68,
+        67, 67, 66, 65, 64, 64, 64, 64, 64, 75, 75, 75, 75, 75, 73, 72, 70, 69,
+        69, 69, 70, 70, 70, 70, 70, 70, 70, 69, 69, 68, 68, 67, 66, 66, 65, 64,
+        64, 63, 63, 63, 63, 75, 75, 75, 75, 76, 74, 72, 71, 69, 69, 69, 69, 69,
+        69, 69, 69, 69, 68, 68, 68, 67, 67, 66, 65, 65, 64, 63, 63, 62, 62, 62,
+        62, 74, 75, 75, 76, 77, 75, 73, 71, 69, 69, 69, 68, 68, 68, 68, 68, 67,
+        67, 67, 66, 66, 65, 65, 64, 64, 63, 63, 62, 61, 61, 61, 61, 74, 75, 76,
+        77, 78, 76, 74, 72, 70, 69, 68, 68, 67, 67, 67, 66, 66, 66, 65, 65, 65,
+        64, 64, 63, 63, 62, 62, 61, 61, 61, 61, 61, 73, 74, 76, 77, 79, 76, 74,
+        72, 70, 69, 68, 67, 66, 66, 65, 65, 65, 64, 64, 64, 63, 63, 63, 62, 62,
+        61, 61, 60, 60, 60, 60, 60, 72, 74, 75, 77, 78, 76, 74, 72, 70, 69, 68,
+        67, 66, 65, 65, 64, 64, 64, 63, 63, 63, 62, 62, 61, 61, 61, 60, 60, 59,
+        59, 59, 59, 72, 73, 75, 76, 78, 76, 74, 72, 70, 69, 68, 67, 65, 65, 64,
+        64, 63, 63, 62, 62, 62, 61, 61, 61, 60, 60, 59, 59, 59, 59, 59, 59, 71,
+        73, 74, 76, 78, 76, 74, 72, 70, 69, 68, 66, 65, 64, 64, 63, 62, 62, 62,
+        61, 61, 60, 60, 60, 59, 59, 59, 58, 58, 58, 58, 58, 71, 72, 74, 76, 77,
+        76, 74, 72, 70, 69, 67, 66, 65, 64, 63, 62, 62, 61, 61, 60, 60, 60, 59,
+        59, 59, 58, 58, 58, 57, 57, 57, 57, 70, 71, 73, 75, 76, 75, 73, 71, 70,
+        68, 67, 66, 64, 64, 63, 62, 61, 61, 60, 60, 59, 59, 59, 58, 58, 58, 57,
+        57, 57, 57, 57, 57, 69, 70, 72, 74, 75, 74, 72, 71, 69, 68, 67, 65, 64,
+        63, 62, 62, 61, 60, 60, 59, 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 56,
+        56, 68, 70, 71, 73, 75, 73, 72, 70, 69, 68, 66, 65, 64, 63, 62, 61, 60,
+        60, 59, 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 56, 56, 56, 67, 69, 70,
+        72, 74, 72, 71, 70, 68, 67, 66, 65, 63, 63, 62, 61, 60, 59, 59, 58, 58,
+        57, 57, 57, 56, 56, 56, 55, 55, 55, 55, 55, 66, 68, 69, 71, 73, 71, 70,
+        69, 68, 67, 65, 64, 63, 62, 61, 60, 60, 59, 58, 58, 57, 57, 57, 56, 56,
+        56, 55, 55, 55, 55, 55, 55, 65, 67, 68, 70, 71, 70, 69, 68, 67, 66, 65,
+        64, 63, 62, 61, 60, 59, 59, 58, 58, 57, 57, 56, 56, 55, 55, 55, 55, 54,
+        54, 54, 54, 64, 66, 67, 69, 70, 69, 68, 67, 66, 65, 64, 63, 62, 61, 61,
+        60, 59, 58, 58, 57, 57, 56, 56, 55, 55, 55, 55, 54, 54, 54, 54, 54, 63,
+        65, 66, 68, 69, 68, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 59, 58, 57,
+        57, 56, 56, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 63, 64, 65, 67, 68,
+        68, 67, 66, 65, 64, 63, 62, 61, 61, 60, 59, 58, 58, 57, 57, 56, 56, 55,
+        55, 54, 54, 54, 54, 53, 53, 53, 53, 62, 63, 65, 66, 67, 67, 66, 65, 64,
+        63, 63, 62, 61, 60, 59, 59, 58, 57, 57, 56, 56, 55, 55, 55, 54, 54, 54,
+        53, 53, 53, 53, 53, 61, 62, 64, 65, 66, 66, 65, 64, 64, 63, 62, 61, 60,
+        60, 59, 58, 58, 57, 56, 56, 55, 55, 55, 54, 54, 54, 53, 53, 53, 53, 53,
+        53, 60, 61, 63, 64, 65, 65, 64, 64, 63, 62, 61, 61, 60, 59, 59, 58, 57,
+        57, 56, 56, 55, 55, 54, 54, 54, 53, 53, 53, 52, 52, 52, 52, 60, 61, 63,
+        64, 65, 65, 64, 64, 63, 62, 61, 61, 60, 59, 59, 58, 57, 57, 56, 56, 55,
+        55, 54, 54, 54, 53, 53, 53, 52, 52, 52, 52, 60, 61, 63, 64, 65, 65, 64,
+        64, 63, 62, 61, 61, 60, 59, 59, 58, 57, 57, 56, 56, 55, 55, 54, 54, 54,
+        53, 53, 53, 52, 52, 52, 52, 60, 61, 63, 64, 65, 65, 64, 64, 63, 62, 61,
+        61, 60, 59, 59, 58, 57, 57, 56, 56, 55, 55, 54, 54, 54, 53, 53, 53, 52,
+        52, 52, 52 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 61, 49, 42, 61, 51, 45, 41, 49, 45, 41, 39, 42, 41, 39, 38,
+        /* Size 8 */
+        64, 74, 71, 62, 55, 50, 47, 45, 74, 69, 70, 65, 58, 53, 49, 46, 71, 70,
+        60, 56, 53, 50, 47, 45, 62, 65, 56, 51, 49, 47, 45, 44, 55, 58, 53, 49,
+        46, 45, 44, 43, 50, 53, 50, 47, 45, 44, 43, 42, 47, 49, 47, 45, 44, 43,
+        42, 42, 45, 46, 45, 44, 43, 42, 42, 41,
+        /* Size 16 */
+        64, 69, 74, 73, 71, 66, 62, 58, 55, 52, 50, 48, 47, 46, 45, 45, 69, 70,
+        71, 71, 71, 67, 63, 60, 56, 54, 51, 49, 48, 47, 45, 45, 74, 71, 69, 70,
+        70, 68, 65, 62, 58, 55, 53, 51, 49, 48, 46, 46, 73, 71, 70, 67, 65, 63,
+        61, 58, 55, 53, 51, 50, 48, 47, 46, 46, 71, 71, 70, 65, 60, 58, 56, 54,
+        53, 51, 50, 48, 47, 46, 45, 45, 66, 67, 68, 63, 58, 56, 54, 52, 51, 50,
+        48, 47, 46, 45, 45, 45, 62, 63, 65, 61, 56, 54, 51, 50, 49, 48, 47, 46,
+        45, 45, 44, 44, 58, 60, 62, 58, 54, 52, 50, 49, 48, 47, 46, 45, 45, 44,
+        44, 44, 55, 56, 58, 55, 53, 51, 49, 48, 46, 46, 45, 44, 44, 43, 43, 43,
+        52, 54, 55, 53, 51, 50, 48, 47, 46, 45, 44, 44, 43, 43, 43, 43, 50, 51,
+        53, 51, 50, 48, 47, 46, 45, 44, 44, 43, 43, 43, 42, 42, 48, 49, 51, 50,
+        48, 47, 46, 45, 44, 44, 43, 43, 42, 42, 42, 42, 47, 48, 49, 48, 47, 46,
+        45, 45, 44, 43, 43, 42, 42, 42, 42, 42, 46, 47, 48, 47, 46, 45, 45, 44,
+        43, 43, 43, 42, 42, 42, 41, 41, 45, 45, 46, 46, 45, 45, 44, 44, 43, 43,
+        42, 42, 42, 41, 41, 41, 45, 45, 46, 46, 45, 45, 44, 44, 43, 43, 42, 42,
+        42, 41, 41, 41,
+        /* Size 32 */
+        64, 67, 69, 72, 74, 73, 73, 72, 71, 69, 66, 64, 62, 60, 58, 57, 55, 53,
+        52, 51, 50, 49, 48, 47, 47, 46, 46, 45, 45, 45, 45, 45, 67, 68, 70, 71,
+        73, 72, 72, 71, 71, 69, 67, 65, 63, 61, 59, 57, 56, 54, 53, 52, 51, 50,
+        49, 48, 47, 47, 46, 45, 45, 45, 45, 45, 69, 70, 70, 71, 71, 71, 71, 71,
+        71, 69, 67, 65, 63, 62, 60, 58, 56, 55, 54, 53, 51, 50, 49, 49, 48, 47,
+        47, 46, 45, 45, 45, 45, 72, 71, 71, 71, 70, 70, 70, 70, 71, 69, 67, 66,
+        64, 63, 61, 59, 57, 56, 55, 53, 52, 51, 50, 49, 48, 48, 47, 46, 46, 46,
+        46, 46, 74, 73, 71, 70, 69, 69, 70, 70, 70, 69, 68, 66, 65, 63, 62, 60,
+        58, 57, 55, 54, 53, 52, 51, 50, 49, 48, 48, 47, 46, 46, 46, 46, 73, 72,
+        71, 70, 69, 69, 69, 68, 68, 67, 65, 64, 63, 61, 60, 58, 57, 56, 54, 53,
+        52, 51, 50, 49, 48, 48, 47, 47, 46, 46, 46, 46, 73, 72, 71, 70, 70, 69,
+        67, 66, 65, 64, 63, 62, 61, 59, 58, 57, 55, 54, 53, 52, 51, 50, 50, 49,
+        48, 47, 47, 46, 46, 46, 46, 46, 72, 71, 71, 70, 70, 68, 66, 64, 62, 61,
+        60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 50, 49, 48, 48, 47, 47, 46,
+        45, 45, 45, 45, 71, 71, 71, 71, 70, 68, 65, 62, 60, 59, 58, 57, 56, 55,
+        54, 54, 53, 52, 51, 50, 50, 49, 48, 48, 47, 47, 46, 46, 45, 45, 45, 45,
+        69, 69, 69, 69, 69, 67, 64, 61, 59, 58, 57, 56, 55, 54, 53, 53, 52, 51,
+        50, 50, 49, 48, 48, 47, 47, 46, 46, 45, 45, 45, 45, 45, 66, 67, 67, 67,
+        68, 65, 63, 60, 58, 57, 56, 55, 54, 53, 52, 51, 51, 50, 50, 49, 48, 48,
+        47, 47, 46, 46, 45, 45, 45, 45, 45, 45, 64, 65, 65, 66, 66, 64, 62, 59,
+        57, 56, 55, 54, 52, 52, 51, 50, 50, 49, 49, 48, 48, 47, 47, 46, 46, 45,
+        45, 45, 44, 44, 44, 44, 62, 63, 63, 64, 65, 63, 61, 58, 56, 55, 54, 52,
+        51, 51, 50, 49, 49, 48, 48, 47, 47, 46, 46, 46, 45, 45, 45, 44, 44, 44,
+        44, 44, 60, 61, 62, 63, 63, 61, 59, 57, 55, 54, 53, 52, 51, 50, 49, 49,
+        48, 48, 47, 47, 46, 46, 46, 45, 45, 45, 44, 44, 44, 44, 44, 44, 58, 59,
+        60, 61, 62, 60, 58, 56, 54, 53, 52, 51, 50, 49, 49, 48, 48, 47, 47, 46,
+        46, 46, 45, 45, 45, 44, 44, 44, 44, 44, 44, 44, 57, 57, 58, 59, 60, 58,
+        57, 55, 54, 53, 51, 50, 49, 49, 48, 48, 47, 47, 46, 46, 45, 45, 45, 45,
+        44, 44, 44, 44, 43, 43, 43, 43, 55, 56, 56, 57, 58, 57, 55, 54, 53, 52,
+        51, 50, 49, 48, 48, 47, 46, 46, 46, 45, 45, 45, 44, 44, 44, 44, 43, 43,
+        43, 43, 43, 43, 53, 54, 55, 56, 57, 56, 54, 53, 52, 51, 50, 49, 48, 48,
+        47, 47, 46, 46, 45, 45, 45, 44, 44, 44, 44, 43, 43, 43, 43, 43, 43, 43,
+        52, 53, 54, 55, 55, 54, 53, 52, 51, 50, 50, 49, 48, 47, 47, 46, 46, 45,
+        45, 45, 44, 44, 44, 44, 43, 43, 43, 43, 43, 43, 43, 43, 51, 52, 53, 53,
+        54, 53, 52, 51, 50, 50, 49, 48, 47, 47, 46, 46, 45, 45, 45, 44, 44, 44,
+        44, 43, 43, 43, 43, 43, 42, 42, 42, 42, 50, 51, 51, 52, 53, 52, 51, 50,
+        50, 49, 48, 48, 47, 46, 46, 45, 45, 45, 44, 44, 44, 43, 43, 43, 43, 43,
+        43, 42, 42, 42, 42, 42, 49, 50, 50, 51, 52, 51, 50, 50, 49, 48, 48, 47,
+        46, 46, 46, 45, 45, 44, 44, 44, 43, 43, 43, 43, 43, 43, 42, 42, 42, 42,
+        42, 42, 48, 49, 49, 50, 51, 50, 50, 49, 48, 48, 47, 47, 46, 46, 45, 45,
+        44, 44, 44, 44, 43, 43, 43, 43, 42, 42, 42, 42, 42, 42, 42, 42, 47, 48,
+        49, 49, 50, 49, 49, 48, 48, 47, 47, 46, 46, 45, 45, 45, 44, 44, 44, 43,
+        43, 43, 43, 43, 42, 42, 42, 42, 42, 42, 42, 42, 47, 47, 48, 48, 49, 48,
+        48, 48, 47, 47, 46, 46, 45, 45, 45, 44, 44, 44, 43, 43, 43, 43, 42, 42,
+        42, 42, 42, 42, 42, 42, 42, 42, 46, 47, 47, 48, 48, 48, 47, 47, 47, 46,
+        46, 45, 45, 45, 44, 44, 44, 43, 43, 43, 43, 43, 42, 42, 42, 42, 42, 42,
+        42, 42, 42, 42, 46, 46, 47, 47, 48, 47, 47, 47, 46, 46, 45, 45, 45, 44,
+        44, 44, 43, 43, 43, 43, 43, 42, 42, 42, 42, 42, 42, 42, 41, 41, 41, 41,
+        45, 45, 46, 46, 47, 47, 46, 46, 46, 45, 45, 45, 44, 44, 44, 44, 43, 43,
+        43, 43, 42, 42, 42, 42, 42, 42, 42, 41, 41, 41, 41, 41, 45, 45, 45, 46,
+        46, 46, 46, 45, 45, 45, 45, 44, 44, 44, 44, 43, 43, 43, 43, 42, 42, 42,
+        42, 42, 42, 42, 41, 41, 41, 41, 41, 41, 45, 45, 45, 46, 46, 46, 46, 45,
+        45, 45, 45, 44, 44, 44, 44, 43, 43, 43, 43, 42, 42, 42, 42, 42, 42, 42,
+        41, 41, 41, 41, 41, 41, 45, 45, 45, 46, 46, 46, 46, 45, 45, 45, 45, 44,
+        44, 44, 44, 43, 43, 43, 43, 42, 42, 42, 42, 42, 42, 42, 41, 41, 41, 41,
+        41, 41, 45, 45, 45, 46, 46, 46, 46, 45, 45, 45, 45, 44, 44, 44, 44, 43,
+        43, 43, 43, 42, 42, 42, 42, 42, 42, 42, 41, 41, 41, 41, 41, 41 },
+      { /* Intra matrices */
+        /* Size 4 */
+        93, 88, 70, 60, 88, 73, 64, 58, 70, 64, 58, 55, 60, 58, 55, 53,
+        /* Size 8 */
+        84, 98, 94, 82, 71, 65, 60, 57, 98, 91, 93, 86, 76, 69, 63, 60, 94, 93,
+        79, 73, 69, 65, 61, 58, 82, 86, 73, 67, 63, 61, 58, 57, 71, 76, 69, 63,
+        60, 58, 56, 55, 65, 69, 65, 61, 58, 56, 55, 54, 60, 63, 61, 58, 56, 55,
+        54, 53, 57, 60, 58, 57, 55, 54, 53, 53,
+        /* Size 16 */
+        86, 93, 100, 98, 96, 89, 83, 78, 73, 69, 66, 63, 61, 60, 58, 58, 93, 94,
+        96, 96, 95, 90, 85, 80, 75, 71, 68, 65, 63, 61, 59, 59, 100, 96, 93, 94,
+        95, 91, 87, 82, 77, 74, 70, 67, 64, 62, 61, 61, 98, 96, 94, 90, 87, 84,
+        81, 77, 74, 71, 68, 65, 63, 61, 60, 60, 96, 95, 95, 87, 80, 77, 74, 72,
+        70, 68, 66, 64, 62, 61, 59, 59, 89, 90, 91, 84, 77, 74, 71, 69, 67, 65,
+        64, 62, 61, 59, 58, 58, 83, 85, 87, 81, 74, 71, 68, 66, 64, 63, 62, 60,
+        59, 58, 58, 58, 78, 80, 82, 77, 72, 69, 66, 64, 62, 61, 60, 59, 58, 58,
+        57, 57, 73, 75, 77, 74, 70, 67, 64, 62, 61, 60, 59, 58, 57, 57, 56, 56,
+        69, 71, 74, 71, 68, 65, 63, 61, 60, 59, 58, 57, 57, 56, 56, 56, 66, 68,
+        70, 68, 66, 64, 62, 60, 59, 58, 57, 56, 56, 55, 55, 55, 63, 65, 67, 65,
+        64, 62, 60, 59, 58, 57, 56, 56, 55, 55, 55, 55, 61, 63, 64, 63, 62, 61,
+        59, 58, 57, 57, 56, 55, 55, 55, 54, 54, 60, 61, 62, 61, 61, 59, 58, 58,
+        57, 56, 55, 55, 55, 54, 54, 54, 58, 59, 61, 60, 59, 58, 58, 57, 56, 56,
+        55, 55, 54, 54, 54, 54, 58, 59, 61, 60, 59, 58, 58, 57, 56, 56, 55, 55,
+        54, 54, 54, 54,
+        /* Size 32 */
+        86, 90, 93, 97, 101, 100, 98, 97, 96, 93, 90, 87, 83, 81, 78, 76, 73,
+        71, 70, 68, 66, 65, 64, 63, 62, 61, 60, 59, 59, 59, 59, 59, 90, 92, 94,
+        97, 99, 98, 97, 97, 96, 93, 90, 87, 84, 82, 79, 77, 74, 73, 71, 69, 67,
+        66, 65, 64, 62, 62, 61, 60, 59, 59, 59, 59, 93, 94, 95, 96, 97, 97, 96,
+        96, 96, 93, 91, 88, 86, 83, 81, 78, 76, 74, 72, 70, 68, 67, 66, 64, 63,
+        62, 62, 61, 60, 60, 60, 60, 97, 97, 96, 96, 95, 95, 95, 96, 96, 93, 91,
+        89, 87, 84, 82, 79, 77, 75, 73, 71, 69, 68, 67, 65, 64, 63, 62, 61, 60,
+        60, 60, 60, 101, 99, 97, 95, 93, 94, 94, 95, 95, 94, 92, 90, 88, 85, 83,
+        80, 78, 76, 74, 72, 70, 69, 68, 66, 65, 64, 63, 62, 61, 61, 61, 61, 100,
+        98, 97, 95, 94, 93, 93, 92, 92, 90, 88, 86, 85, 82, 80, 78, 76, 74, 73,
+        71, 69, 68, 67, 65, 64, 63, 62, 62, 61, 61, 61, 61, 98, 97, 96, 95, 94,
+        93, 91, 90, 88, 86, 85, 83, 81, 80, 78, 76, 74, 73, 71, 70, 68, 67, 66,
+        65, 64, 63, 62, 61, 60, 60, 60, 60, 97, 97, 96, 96, 95, 92, 90, 87, 84,
+        83, 81, 80, 78, 77, 75, 74, 72, 71, 70, 68, 67, 66, 65, 64, 63, 62, 61,
+        61, 60, 60, 60, 60, 96, 96, 96, 96, 95, 92, 88, 84, 80, 79, 78, 76, 75,
+        74, 73, 72, 70, 69, 68, 67, 66, 65, 64, 63, 62, 62, 61, 60, 60, 60, 60,
+        60, 93, 93, 93, 93, 94, 90, 86, 83, 79, 78, 76, 75, 73, 72, 71, 70, 69,
+        68, 67, 66, 65, 64, 63, 63, 62, 61, 60, 60, 59, 59, 59, 59, 90, 90, 91,
+        91, 92, 88, 85, 81, 78, 76, 75, 73, 72, 71, 70, 69, 67, 67, 66, 65, 64,
+        63, 63, 62, 61, 61, 60, 59, 59, 59, 59, 59, 87, 87, 88, 89, 90, 86, 83,
+        80, 76, 75, 73, 72, 70, 69, 68, 67, 66, 65, 65, 64, 63, 62, 62, 61, 60,
+        60, 59, 59, 58, 58, 58, 58, 83, 84, 86, 87, 88, 85, 81, 78, 75, 73, 72,
+        70, 68, 67, 66, 66, 65, 64, 63, 63, 62, 61, 61, 60, 60, 59, 59, 58, 58,
+        58, 58, 58, 81, 82, 83, 84, 85, 82, 80, 77, 74, 72, 71, 69, 67, 66, 66,
+        65, 64, 63, 63, 62, 61, 61, 60, 60, 59, 59, 58, 58, 58, 58, 58, 58, 78,
+        79, 81, 82, 83, 80, 78, 75, 73, 71, 70, 68, 66, 66, 65, 64, 63, 62, 62,
+        61, 61, 60, 60, 59, 59, 58, 58, 58, 57, 57, 57, 57, 76, 77, 78, 79, 80,
+        78, 76, 74, 72, 70, 69, 67, 66, 65, 64, 63, 62, 62, 61, 60, 60, 59, 59,
+        59, 58, 58, 58, 57, 57, 57, 57, 57, 73, 74, 76, 77, 78, 76, 74, 72, 70,
+        69, 67, 66, 65, 64, 63, 62, 61, 61, 60, 60, 59, 59, 58, 58, 58, 57, 57,
+        57, 57, 57, 57, 57, 71, 73, 74, 75, 76, 74, 73, 71, 69, 68, 67, 65, 64,
+        63, 62, 62, 61, 60, 60, 59, 59, 58, 58, 58, 57, 57, 57, 57, 56, 56, 56,
+        56, 70, 71, 72, 73, 74, 73, 71, 70, 68, 67, 66, 65, 63, 63, 62, 61, 60,
+        60, 59, 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 56, 56, 56, 68, 69, 70,
+        71, 72, 71, 70, 68, 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 59, 58, 58,
+        58, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 66, 67, 68, 69, 70, 69, 68,
+        67, 66, 65, 64, 63, 62, 61, 61, 60, 59, 59, 58, 58, 57, 57, 57, 57, 56,
+        56, 56, 56, 55, 55, 55, 55, 65, 66, 67, 68, 69, 68, 67, 66, 65, 64, 63,
+        62, 61, 61, 60, 59, 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 56, 55, 55,
+        55, 55, 55, 64, 65, 66, 67, 68, 67, 66, 65, 64, 63, 63, 62, 61, 60, 60,
+        59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 63,
+        64, 64, 65, 66, 65, 65, 64, 63, 63, 62, 61, 60, 60, 59, 59, 58, 58, 57,
+        57, 57, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 62, 62, 63, 64, 65,
+        64, 64, 63, 62, 62, 61, 60, 60, 59, 59, 58, 58, 57, 57, 57, 56, 56, 56,
+        56, 55, 55, 55, 55, 55, 55, 55, 55, 61, 62, 62, 63, 64, 63, 63, 62, 62,
+        61, 61, 60, 59, 59, 58, 58, 57, 57, 57, 56, 56, 56, 56, 55, 55, 55, 55,
+        55, 54, 54, 54, 54, 60, 61, 62, 62, 63, 62, 62, 61, 61, 60, 60, 59, 59,
+        58, 58, 58, 57, 57, 56, 56, 56, 56, 55, 55, 55, 55, 55, 54, 54, 54, 54,
+        54, 59, 60, 61, 61, 62, 62, 61, 61, 60, 60, 59, 59, 58, 58, 58, 57, 57,
+        57, 56, 56, 56, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 59, 59, 60,
+        60, 61, 61, 60, 60, 60, 59, 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 55,
+        55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 59, 59, 60, 60, 61, 61, 60,
+        60, 60, 59, 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 55, 55, 55, 55, 55,
+        54, 54, 54, 54, 54, 54, 54, 59, 59, 60, 60, 61, 61, 60, 60, 60, 59, 59,
+        58, 58, 58, 57, 57, 57, 56, 56, 56, 55, 55, 55, 55, 55, 54, 54, 54, 54,
+        54, 54, 54, 59, 59, 60, 60, 61, 61, 60, 60, 60, 59, 59, 58, 58, 58, 57,
+        57, 57, 56, 56, 56, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 51, 50, 45, 51, 48, 47, 44, 50, 47, 43, 41, 45, 44, 41, 40,
+        /* Size 8 */
+        64, 72, 54, 52, 51, 49, 47, 45, 72, 59, 53, 55, 55, 53, 50, 48, 54, 53,
+        50, 51, 51, 50, 48, 47, 52, 55, 51, 48, 48, 47, 46, 45, 51, 55, 51, 48,
+        46, 45, 44, 43, 49, 53, 50, 47, 45, 44, 43, 42, 47, 50, 48, 46, 44, 43,
+        42, 41, 45, 48, 47, 45, 43, 42, 41, 41,
+        /* Size 16 */
+        64, 68, 72, 63, 54, 53, 52, 52, 51, 50, 49, 48, 47, 46, 45, 45, 68, 67,
+        66, 59, 53, 54, 54, 53, 53, 52, 51, 50, 48, 47, 46, 46, 72, 66, 59, 56,
+        53, 54, 55, 55, 55, 54, 53, 51, 50, 49, 48, 48, 63, 59, 56, 54, 52, 52,
+        53, 53, 53, 52, 51, 50, 49, 48, 47, 47, 54, 53, 53, 52, 50, 50, 51, 51,
+        51, 50, 50, 49, 48, 47, 47, 47, 53, 54, 54, 52, 50, 50, 49, 49, 49, 49,
+        48, 48, 47, 46, 46, 46, 52, 54, 55, 53, 51, 49, 48, 48, 48, 47, 47, 46,
+        46, 45, 45, 45, 52, 53, 55, 53, 51, 49, 48, 47, 47, 46, 46, 45, 45, 45,
+        44, 44, 51, 53, 55, 53, 51, 49, 48, 47, 46, 45, 45, 44, 44, 44, 43, 43,
+        50, 52, 54, 52, 50, 49, 47, 46, 45, 45, 44, 44, 43, 43, 43, 43, 49, 51,
+        53, 51, 50, 48, 47, 46, 45, 44, 44, 43, 43, 42, 42, 42, 48, 50, 51, 50,
+        49, 48, 46, 45, 44, 44, 43, 43, 42, 42, 42, 42, 47, 48, 50, 49, 48, 47,
+        46, 45, 44, 43, 43, 42, 42, 42, 41, 41, 46, 47, 49, 48, 47, 46, 45, 45,
+        44, 43, 42, 42, 42, 41, 41, 41, 45, 46, 48, 47, 47, 46, 45, 44, 43, 43,
+        42, 42, 41, 41, 41, 41, 45, 46, 48, 47, 47, 46, 45, 44, 43, 43, 42, 42,
+        41, 41, 41, 41,
+        /* Size 32 */
+        64, 66, 68, 70, 72, 67, 63, 58, 54, 53, 53, 53, 52, 52, 52, 51, 51, 50,
+        50, 49, 49, 48, 48, 47, 47, 46, 46, 45, 45, 45, 45, 45, 66, 67, 67, 68,
+        69, 65, 61, 57, 53, 53, 53, 53, 53, 53, 52, 52, 52, 51, 51, 50, 50, 49,
+        49, 48, 48, 47, 47, 46, 46, 46, 46, 46, 68, 67, 67, 66, 66, 63, 59, 56,
+        53, 53, 54, 54, 54, 54, 53, 53, 53, 52, 52, 51, 51, 50, 50, 49, 48, 48,
+        47, 47, 46, 46, 46, 46, 70, 68, 66, 64, 62, 60, 58, 56, 53, 54, 54, 54,
+        55, 54, 54, 54, 54, 53, 53, 52, 52, 51, 50, 50, 49, 49, 48, 48, 47, 47,
+        47, 47, 72, 69, 66, 62, 59, 58, 56, 55, 53, 54, 54, 55, 55, 55, 55, 55,
+        55, 54, 54, 53, 53, 52, 51, 51, 50, 50, 49, 48, 48, 48, 48, 48, 67, 65,
+        63, 60, 58, 56, 55, 54, 52, 53, 53, 54, 54, 54, 54, 54, 54, 53, 53, 52,
+        52, 51, 51, 50, 50, 49, 49, 48, 48, 48, 48, 48, 63, 61, 59, 58, 56, 55,
+        54, 53, 52, 52, 52, 53, 53, 53, 53, 53, 53, 52, 52, 52, 51, 51, 50, 50,
+        49, 49, 48, 48, 47, 47, 47, 47, 58, 57, 56, 56, 55, 54, 53, 52, 51, 51,
+        51, 52, 52, 52, 52, 52, 52, 51, 51, 51, 50, 50, 50, 49, 49, 48, 48, 47,
+        47, 47, 47, 47, 54, 53, 53, 53, 53, 52, 52, 51, 50, 50, 50, 50, 51, 51,
+        51, 51, 51, 50, 50, 50, 50, 49, 49, 49, 48, 48, 47, 47, 47, 47, 47, 47,
+        53, 53, 53, 54, 54, 53, 52, 51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+        49, 49, 49, 49, 48, 48, 48, 47, 47, 46, 46, 46, 46, 46, 53, 53, 54, 54,
+        54, 53, 52, 51, 50, 50, 50, 50, 49, 49, 49, 49, 49, 49, 49, 48, 48, 48,
+        48, 47, 47, 47, 46, 46, 46, 46, 46, 46, 53, 53, 54, 54, 55, 54, 53, 52,
+        50, 50, 50, 49, 49, 49, 49, 48, 48, 48, 48, 48, 48, 47, 47, 47, 46, 46,
+        46, 46, 45, 45, 45, 45, 52, 53, 54, 55, 55, 54, 53, 52, 51, 50, 49, 49,
+        48, 48, 48, 48, 48, 47, 47, 47, 47, 47, 46, 46, 46, 46, 45, 45, 45, 45,
+        45, 45, 52, 53, 54, 54, 55, 54, 53, 52, 51, 50, 49, 49, 48, 48, 48, 47,
+        47, 47, 47, 47, 46, 46, 46, 46, 45, 45, 45, 45, 44, 44, 44, 44, 52, 52,
+        53, 54, 55, 54, 53, 52, 51, 50, 49, 49, 48, 48, 47, 47, 47, 46, 46, 46,
+        46, 46, 45, 45, 45, 45, 45, 44, 44, 44, 44, 44, 51, 52, 53, 54, 55, 54,
+        53, 52, 51, 50, 49, 48, 48, 47, 47, 47, 46, 46, 46, 46, 45, 45, 45, 45,
+        45, 44, 44, 44, 44, 44, 44, 44, 51, 52, 53, 54, 55, 54, 53, 52, 51, 50,
+        49, 48, 48, 47, 47, 46, 46, 46, 45, 45, 45, 45, 44, 44, 44, 44, 44, 44,
+        43, 43, 43, 43, 50, 51, 52, 53, 54, 53, 52, 51, 50, 50, 49, 48, 47, 47,
+        46, 46, 46, 45, 45, 45, 45, 44, 44, 44, 44, 44, 43, 43, 43, 43, 43, 43,
+        50, 51, 52, 53, 54, 53, 52, 51, 50, 49, 49, 48, 47, 47, 46, 46, 45, 45,
+        45, 44, 44, 44, 44, 44, 43, 43, 43, 43, 43, 43, 43, 43, 49, 50, 51, 52,
+        53, 52, 52, 51, 50, 49, 48, 48, 47, 47, 46, 46, 45, 45, 44, 44, 44, 44,
+        43, 43, 43, 43, 43, 43, 42, 42, 42, 42, 49, 50, 51, 52, 53, 52, 51, 50,
+        50, 49, 48, 48, 47, 46, 46, 45, 45, 45, 44, 44, 44, 43, 43, 43, 43, 43,
+        42, 42, 42, 42, 42, 42, 48, 49, 50, 51, 52, 51, 51, 50, 49, 49, 48, 47,
+        47, 46, 46, 45, 45, 44, 44, 44, 43, 43, 43, 43, 43, 42, 42, 42, 42, 42,
+        42, 42, 48, 49, 50, 50, 51, 51, 50, 50, 49, 48, 48, 47, 46, 46, 45, 45,
+        44, 44, 44, 43, 43, 43, 43, 43, 42, 42, 42, 42, 42, 42, 42, 42, 47, 48,
+        49, 50, 51, 50, 50, 49, 49, 48, 47, 47, 46, 46, 45, 45, 44, 44, 44, 43,
+        43, 43, 43, 42, 42, 42, 42, 42, 41, 41, 41, 41, 47, 48, 48, 49, 50, 50,
+        49, 49, 48, 48, 47, 46, 46, 45, 45, 45, 44, 44, 43, 43, 43, 43, 42, 42,
+        42, 42, 42, 41, 41, 41, 41, 41, 46, 47, 48, 49, 50, 49, 49, 48, 48, 47,
+        47, 46, 46, 45, 45, 44, 44, 44, 43, 43, 43, 42, 42, 42, 42, 42, 41, 41,
+        41, 41, 41, 41, 46, 47, 47, 48, 49, 49, 48, 48, 47, 47, 46, 46, 45, 45,
+        45, 44, 44, 43, 43, 43, 42, 42, 42, 42, 42, 41, 41, 41, 41, 41, 41, 41,
+        45, 46, 47, 48, 48, 48, 48, 47, 47, 46, 46, 46, 45, 45, 44, 44, 44, 43,
+        43, 43, 42, 42, 42, 42, 41, 41, 41, 41, 41, 41, 41, 41, 45, 46, 46, 47,
+        48, 48, 47, 47, 47, 46, 46, 45, 45, 44, 44, 44, 43, 43, 43, 42, 42, 42,
+        42, 41, 41, 41, 41, 41, 41, 41, 41, 41, 45, 46, 46, 47, 48, 48, 47, 47,
+        47, 46, 46, 45, 45, 44, 44, 44, 43, 43, 43, 42, 42, 42, 42, 41, 41, 41,
+        41, 41, 41, 41, 41, 41, 45, 46, 46, 47, 48, 48, 47, 47, 47, 46, 46, 45,
+        45, 44, 44, 44, 43, 43, 43, 42, 42, 42, 42, 41, 41, 41, 41, 41, 41, 41,
+        41, 41, 45, 46, 46, 47, 48, 48, 47, 47, 47, 46, 46, 45, 45, 44, 44, 44,
+        43, 43, 43, 42, 42, 42, 42, 41, 41, 41, 41, 41, 41, 41, 41, 41 },
+      { /* Intra matrices */
+        /* Size 4 */
+        90, 72, 69, 63, 72, 66, 65, 62, 69, 65, 59, 57, 63, 62, 57, 54,
+        /* Size 8 */
+        87, 98, 72, 70, 68, 65, 62, 60, 98, 80, 71, 74, 74, 70, 67, 64, 72, 71,
+        67, 68, 68, 66, 64, 62, 70, 74, 68, 64, 63, 62, 61, 59, 68, 74, 68, 63,
+        61, 59, 58, 57, 65, 70, 66, 62, 59, 58, 57, 56, 62, 67, 64, 61, 58, 57,
+        55, 54, 60, 64, 62, 59, 57, 56, 54, 53,
+        /* Size 16 */
+        88, 93, 99, 86, 73, 72, 71, 70, 69, 67, 66, 64, 63, 62, 60, 60, 93, 92,
+        90, 81, 72, 73, 73, 72, 72, 70, 69, 67, 65, 64, 62, 62, 99, 90, 81, 76,
+        72, 74, 75, 75, 74, 73, 71, 69, 68, 66, 65, 65, 86, 81, 76, 73, 70, 71,
+        72, 72, 71, 70, 69, 68, 66, 65, 64, 64, 73, 72, 72, 70, 67, 68, 68, 68,
+        68, 68, 67, 66, 65, 64, 63, 63, 72, 73, 74, 71, 68, 67, 67, 66, 66, 66,
+        65, 64, 63, 62, 61, 61, 71, 73, 75, 72, 68, 67, 65, 65, 64, 63, 63, 62,
+        62, 61, 60, 60, 70, 72, 75, 72, 68, 66, 65, 64, 63, 62, 62, 61, 60, 60,
+        59, 59, 69, 72, 74, 71, 68, 66, 64, 63, 61, 61, 60, 60, 59, 58, 58, 58,
+        67, 70, 73, 70, 68, 66, 63, 62, 61, 60, 59, 59, 58, 58, 57, 57, 66, 69,
+        71, 69, 67, 65, 63, 62, 60, 59, 58, 58, 57, 57, 56, 56, 64, 67, 69, 68,
+        66, 64, 62, 61, 60, 59, 58, 57, 56, 56, 56, 56, 63, 65, 68, 66, 65, 63,
+        62, 60, 59, 58, 57, 56, 56, 55, 55, 55, 62, 64, 66, 65, 64, 62, 61, 60,
+        58, 58, 57, 56, 55, 55, 54, 54, 60, 62, 65, 64, 63, 61, 60, 59, 58, 57,
+        56, 56, 55, 54, 54, 54, 60, 62, 65, 64, 63, 61, 60, 59, 58, 57, 56, 56,
+        55, 54, 54, 54,
+        /* Size 32 */
+        88, 91, 94, 97, 100, 93, 86, 80, 73, 73, 72, 72, 71, 71, 70, 70, 69, 68,
+        68, 67, 66, 65, 65, 64, 63, 63, 62, 61, 61, 61, 61, 61, 91, 92, 93, 94,
+        95, 89, 84, 78, 73, 73, 73, 72, 72, 72, 71, 71, 71, 70, 69, 68, 68, 67,
+        66, 65, 64, 64, 63, 62, 62, 62, 62, 62, 94, 93, 92, 91, 90, 86, 82, 77,
+        73, 73, 73, 73, 73, 73, 73, 72, 72, 71, 70, 70, 69, 68, 67, 66, 66, 65,
+        64, 63, 63, 63, 63, 63, 97, 94, 91, 89, 86, 83, 79, 76, 73, 73, 74, 74,
+        75, 74, 74, 74, 73, 73, 72, 71, 70, 69, 69, 68, 67, 66, 65, 65, 64, 64,
+        64, 64, 100, 95, 90, 86, 81, 79, 77, 75, 72, 73, 74, 75, 76, 75, 75, 75,
+        75, 74, 73, 72, 72, 71, 70, 69, 68, 67, 66, 66, 65, 65, 65, 65, 93, 89,
+        86, 83, 79, 77, 75, 73, 71, 72, 73, 73, 74, 74, 74, 73, 73, 73, 72, 71,
+        71, 70, 69, 68, 67, 67, 66, 65, 64, 64, 64, 64, 86, 84, 82, 79, 77, 75,
+        73, 72, 70, 71, 71, 72, 72, 72, 72, 72, 72, 71, 71, 70, 70, 69, 68, 67,
+        67, 66, 65, 65, 64, 64, 64, 64, 80, 78, 77, 76, 75, 73, 72, 70, 69, 69,
+        70, 70, 70, 70, 70, 70, 70, 70, 69, 69, 68, 68, 67, 67, 66, 65, 65, 64,
+        63, 63, 63, 63, 73, 73, 73, 73, 72, 71, 70, 69, 68, 68, 68, 68, 69, 69,
+        69, 69, 69, 68, 68, 68, 67, 67, 66, 66, 65, 65, 64, 63, 63, 63, 63, 63,
+        73, 73, 73, 73, 73, 72, 71, 69, 68, 68, 68, 68, 68, 68, 68, 68, 68, 67,
+        67, 67, 66, 66, 65, 65, 64, 64, 63, 63, 62, 62, 62, 62, 72, 73, 73, 74,
+        74, 73, 71, 70, 68, 68, 68, 67, 67, 67, 67, 67, 67, 66, 66, 66, 65, 65,
+        64, 64, 64, 63, 63, 62, 62, 62, 62, 62, 72, 72, 73, 74, 75, 73, 72, 70,
+        68, 68, 67, 67, 66, 66, 66, 66, 65, 65, 65, 65, 64, 64, 64, 63, 63, 62,
+        62, 62, 61, 61, 61, 61, 71, 72, 73, 75, 76, 74, 72, 70, 69, 68, 67, 66,
+        65, 65, 65, 65, 64, 64, 64, 64, 63, 63, 63, 62, 62, 62, 61, 61, 60, 60,
+        60, 60, 71, 72, 73, 74, 75, 74, 72, 70, 69, 68, 67, 66, 65, 65, 64, 64,
+        64, 63, 63, 63, 63, 62, 62, 62, 61, 61, 61, 60, 60, 60, 60, 60, 70, 71,
+        73, 74, 75, 74, 72, 70, 69, 68, 67, 66, 65, 64, 64, 64, 63, 63, 62, 62,
+        62, 62, 61, 61, 61, 60, 60, 60, 59, 59, 59, 59, 70, 71, 72, 74, 75, 73,
+        72, 70, 69, 68, 67, 66, 65, 64, 64, 63, 62, 62, 62, 61, 61, 61, 61, 60,
+        60, 60, 59, 59, 59, 59, 59, 59, 69, 71, 72, 73, 75, 73, 72, 70, 69, 68,
+        67, 65, 64, 64, 63, 62, 62, 61, 61, 61, 60, 60, 60, 60, 59, 59, 59, 59,
+        58, 58, 58, 58, 68, 70, 71, 73, 74, 73, 71, 70, 68, 67, 66, 65, 64, 63,
+        63, 62, 61, 61, 61, 60, 60, 60, 59, 59, 59, 59, 58, 58, 58, 58, 58, 58,
+        68, 69, 70, 72, 73, 72, 71, 69, 68, 67, 66, 65, 64, 63, 62, 62, 61, 61,
+        60, 60, 60, 59, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57, 67, 68, 70, 71,
+        72, 71, 70, 69, 68, 67, 66, 65, 64, 63, 62, 61, 61, 60, 60, 60, 59, 59,
+        58, 58, 58, 58, 57, 57, 57, 57, 57, 57, 66, 68, 69, 70, 72, 71, 70, 68,
+        67, 66, 65, 64, 63, 63, 62, 61, 60, 60, 60, 59, 59, 58, 58, 58, 57, 57,
+        57, 57, 57, 57, 57, 57, 65, 67, 68, 69, 71, 70, 69, 68, 67, 66, 65, 64,
+        63, 62, 62, 61, 60, 60, 59, 59, 58, 58, 58, 57, 57, 57, 57, 56, 56, 56,
+        56, 56, 65, 66, 67, 69, 70, 69, 68, 67, 66, 65, 64, 64, 63, 62, 61, 61,
+        60, 59, 59, 58, 58, 58, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56, 64, 65,
+        66, 68, 69, 68, 67, 67, 66, 65, 64, 63, 62, 62, 61, 60, 60, 59, 59, 58,
+        58, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 56, 63, 64, 66, 67, 68, 67,
+        67, 66, 65, 64, 64, 63, 62, 61, 61, 60, 59, 59, 58, 58, 57, 57, 57, 56,
+        56, 56, 56, 55, 55, 55, 55, 55, 63, 64, 65, 66, 67, 67, 66, 65, 65, 64,
+        63, 62, 62, 61, 60, 60, 59, 59, 58, 58, 57, 57, 57, 56, 56, 56, 55, 55,
+        55, 55, 55, 55, 62, 63, 64, 65, 66, 66, 65, 65, 64, 63, 63, 62, 61, 61,
+        60, 59, 59, 58, 58, 57, 57, 57, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55,
+        61, 62, 63, 65, 66, 65, 65, 64, 63, 63, 62, 62, 61, 60, 60, 59, 59, 58,
+        58, 57, 57, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 61, 62, 63, 64,
+        65, 64, 64, 63, 63, 62, 62, 61, 60, 60, 59, 59, 58, 58, 57, 57, 57, 56,
+        56, 56, 55, 55, 55, 55, 54, 54, 54, 54, 61, 62, 63, 64, 65, 64, 64, 63,
+        63, 62, 62, 61, 60, 60, 59, 59, 58, 58, 57, 57, 57, 56, 56, 56, 55, 55,
+        55, 55, 54, 54, 54, 54, 61, 62, 63, 64, 65, 64, 64, 63, 63, 62, 62, 61,
+        60, 60, 59, 59, 58, 58, 57, 57, 57, 56, 56, 56, 55, 55, 55, 55, 54, 54,
+        54, 54, 61, 62, 63, 64, 65, 64, 64, 63, 63, 62, 62, 61, 60, 60, 59, 59,
+        58, 58, 57, 57, 57, 56, 56, 56, 55, 55, 55, 55, 54, 54, 54, 54 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 62, 51, 46, 62, 53, 48, 45, 51, 48, 45, 43, 46, 45, 43, 42,
+        /* Size 8 */
+        64, 72, 70, 62, 56, 52, 50, 48, 72, 68, 69, 65, 59, 55, 52, 49, 70, 69,
+        61, 57, 55, 52, 50, 49, 62, 65, 57, 53, 51, 50, 49, 48, 56, 59, 55, 51,
+        49, 48, 47, 47, 52, 55, 52, 50, 48, 47, 47, 46, 50, 52, 50, 49, 47, 47,
+        46, 46, 48, 49, 49, 48, 47, 46, 46, 45,
+        /* Size 16 */
+        64, 68, 72, 71, 70, 66, 62, 59, 56, 54, 52, 51, 50, 49, 48, 48, 68, 69,
+        70, 70, 70, 67, 64, 61, 58, 56, 54, 52, 51, 50, 49, 49, 72, 70, 68, 69,
+        69, 67, 65, 62, 59, 57, 55, 53, 52, 50, 49, 49, 71, 70, 69, 67, 65, 63,
+        61, 59, 57, 55, 53, 52, 51, 50, 49, 49, 70, 70, 69, 65, 61, 59, 57, 56,
+        55, 53, 52, 51, 50, 49, 49, 49, 66, 67, 67, 63, 59, 57, 55, 54, 53, 52,
+        51, 50, 49, 49, 48, 48, 62, 64, 65, 61, 57, 55, 53, 52, 51, 51, 50, 49,
+        49, 48, 48, 48, 59, 61, 62, 59, 56, 54, 52, 51, 50, 50, 49, 49, 48, 48,
+        47, 47, 56, 58, 59, 57, 55, 53, 51, 50, 49, 49, 48, 48, 47, 47, 47, 47,
+        54, 56, 57, 55, 53, 52, 51, 50, 49, 48, 48, 47, 47, 47, 46, 46, 52, 54,
+        55, 53, 52, 51, 50, 49, 48, 48, 47, 47, 47, 46, 46, 46, 51, 52, 53, 52,
+        51, 50, 49, 49, 48, 47, 47, 47, 46, 46, 46, 46, 50, 51, 52, 51, 50, 49,
+        49, 48, 47, 47, 47, 46, 46, 46, 46, 46, 49, 50, 50, 50, 49, 49, 48, 48,
+        47, 47, 46, 46, 46, 46, 45, 45, 48, 49, 49, 49, 49, 48, 48, 47, 47, 46,
+        46, 46, 46, 45, 45, 45, 48, 49, 49, 49, 49, 48, 48, 47, 47, 46, 46, 46,
+        46, 45, 45, 45,
+        /* Size 32 */
+        64, 66, 68, 70, 72, 72, 71, 70, 70, 68, 66, 64, 62, 61, 59, 58, 56, 55,
+        54, 53, 52, 52, 51, 50, 50, 49, 49, 48, 48, 48, 48, 48, 66, 67, 69, 70,
+        71, 71, 70, 70, 70, 68, 66, 65, 63, 61, 60, 59, 57, 56, 55, 54, 53, 52,
+        52, 51, 50, 50, 49, 49, 48, 48, 48, 48, 68, 69, 69, 70, 70, 70, 70, 70,
+        70, 68, 67, 65, 64, 62, 61, 59, 58, 57, 56, 55, 54, 53, 52, 51, 51, 50,
+        50, 49, 49, 49, 49, 49, 70, 70, 70, 69, 69, 69, 69, 69, 69, 68, 67, 66,
+        64, 63, 61, 60, 58, 57, 56, 55, 54, 53, 53, 52, 51, 51, 50, 49, 49, 49,
+        49, 49, 72, 71, 70, 69, 68, 68, 69, 69, 69, 68, 67, 66, 65, 63, 62, 61,
+        59, 58, 57, 56, 55, 54, 53, 52, 52, 51, 50, 50, 49, 49, 49, 49, 72, 71,
+        70, 69, 68, 68, 68, 67, 67, 66, 65, 64, 63, 62, 61, 59, 58, 57, 56, 55,
+        54, 53, 53, 52, 51, 51, 50, 50, 49, 49, 49, 49, 71, 70, 70, 69, 69, 68,
+        67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 53, 52, 51,
+        51, 50, 50, 49, 49, 49, 49, 49, 70, 70, 70, 69, 69, 67, 66, 64, 63, 62,
+        61, 60, 59, 58, 58, 57, 56, 55, 54, 54, 53, 52, 52, 51, 50, 50, 50, 49,
+        49, 49, 49, 49, 70, 70, 70, 69, 69, 67, 65, 63, 61, 60, 59, 58, 57, 57,
+        56, 55, 55, 54, 53, 53, 52, 52, 51, 51, 50, 50, 49, 49, 49, 49, 49, 49,
+        68, 68, 68, 68, 68, 66, 64, 62, 60, 59, 58, 57, 56, 56, 55, 55, 54, 53,
+        53, 52, 52, 51, 51, 50, 50, 49, 49, 49, 48, 48, 48, 48, 66, 66, 67, 67,
+        67, 65, 63, 61, 59, 58, 57, 56, 55, 55, 54, 54, 53, 53, 52, 52, 51, 51,
+        50, 50, 49, 49, 49, 48, 48, 48, 48, 48, 64, 65, 65, 66, 66, 64, 62, 60,
+        58, 57, 56, 55, 54, 54, 53, 53, 52, 52, 51, 51, 50, 50, 50, 49, 49, 49,
+        48, 48, 48, 48, 48, 48, 62, 63, 64, 64, 65, 63, 61, 59, 57, 56, 55, 54,
+        53, 53, 52, 52, 51, 51, 51, 50, 50, 50, 49, 49, 49, 48, 48, 48, 48, 48,
+        48, 48, 61, 61, 62, 63, 63, 62, 60, 58, 57, 56, 55, 54, 53, 52, 52, 51,
+        51, 51, 50, 50, 49, 49, 49, 49, 48, 48, 48, 48, 47, 47, 47, 47, 59, 60,
+        61, 61, 62, 61, 59, 58, 56, 55, 54, 53, 52, 52, 51, 51, 50, 50, 50, 49,
+        49, 49, 49, 48, 48, 48, 48, 47, 47, 47, 47, 47, 58, 59, 59, 60, 61, 59,
+        58, 57, 55, 55, 54, 53, 52, 51, 51, 50, 50, 50, 49, 49, 49, 48, 48, 48,
+        48, 48, 47, 47, 47, 47, 47, 47, 56, 57, 58, 58, 59, 58, 57, 56, 55, 54,
+        53, 52, 51, 51, 50, 50, 49, 49, 49, 49, 48, 48, 48, 48, 47, 47, 47, 47,
+        47, 47, 47, 47, 55, 56, 57, 57, 58, 57, 56, 55, 54, 53, 53, 52, 51, 51,
+        50, 50, 49, 49, 49, 48, 48, 48, 48, 47, 47, 47, 47, 47, 47, 47, 47, 47,
+        54, 55, 56, 56, 57, 56, 55, 54, 53, 53, 52, 51, 51, 50, 50, 49, 49, 49,
+        48, 48, 48, 48, 47, 47, 47, 47, 47, 47, 46, 46, 46, 46, 53, 54, 55, 55,
+        56, 55, 54, 54, 53, 52, 52, 51, 50, 50, 49, 49, 49, 48, 48, 48, 48, 47,
+        47, 47, 47, 47, 47, 46, 46, 46, 46, 46, 52, 53, 54, 54, 55, 54, 53, 53,
+        52, 52, 51, 50, 50, 49, 49, 49, 48, 48, 48, 48, 47, 47, 47, 47, 47, 46,
+        46, 46, 46, 46, 46, 46, 52, 52, 53, 53, 54, 53, 53, 52, 52, 51, 51, 50,
+        50, 49, 49, 48, 48, 48, 48, 47, 47, 47, 47, 47, 46, 46, 46, 46, 46, 46,
+        46, 46, 51, 52, 52, 53, 53, 53, 52, 52, 51, 51, 50, 50, 49, 49, 49, 48,
+        48, 48, 47, 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, 46, 46, 46, 50, 51,
+        51, 52, 52, 52, 51, 51, 51, 50, 50, 49, 49, 49, 48, 48, 48, 47, 47, 47,
+        47, 47, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 50, 50, 51, 51, 52, 51,
+        51, 50, 50, 50, 49, 49, 49, 48, 48, 48, 47, 47, 47, 47, 47, 46, 46, 46,
+        46, 46, 46, 46, 46, 46, 46, 46, 49, 50, 50, 51, 51, 51, 50, 50, 50, 49,
+        49, 49, 48, 48, 48, 48, 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, 46, 46,
+        46, 46, 46, 46, 49, 49, 50, 50, 50, 50, 50, 50, 49, 49, 49, 48, 48, 48,
+        48, 47, 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, 46, 46, 45, 45, 45, 45,
+        48, 49, 49, 49, 50, 50, 49, 49, 49, 49, 48, 48, 48, 48, 47, 47, 47, 47,
+        47, 46, 46, 46, 46, 46, 46, 46, 46, 45, 45, 45, 45, 45, 48, 48, 49, 49,
+        49, 49, 49, 49, 49, 48, 48, 48, 48, 47, 47, 47, 47, 47, 46, 46, 46, 46,
+        46, 46, 46, 46, 45, 45, 45, 45, 45, 45, 48, 48, 49, 49, 49, 49, 49, 49,
+        49, 48, 48, 48, 48, 47, 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, 46, 46,
+        45, 45, 45, 45, 45, 45, 48, 48, 49, 49, 49, 49, 49, 49, 49, 48, 48, 48,
+        48, 47, 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, 46, 46, 45, 45, 45, 45,
+        45, 45, 48, 48, 49, 49, 49, 49, 49, 49, 49, 48, 48, 48, 48, 47, 47, 47,
+        47, 47, 46, 46, 46, 46, 46, 46, 46, 46, 45, 45, 45, 45, 45, 45 },
+      { /* Intra matrices */
+        /* Size 4 */
+        86, 83, 68, 60, 83, 70, 64, 59, 68, 64, 59, 57, 60, 59, 57, 55,
+        /* Size 8 */
+        79, 90, 87, 77, 69, 64, 61, 58, 90, 85, 86, 80, 73, 67, 63, 60, 87, 86,
+        75, 71, 67, 64, 61, 59, 77, 80, 71, 66, 63, 61, 59, 58, 69, 73, 67, 63,
+        60, 59, 58, 57, 64, 67, 64, 61, 59, 57, 57, 56, 61, 63, 61, 59, 58, 57,
+        56, 55, 58, 60, 59, 58, 57, 56, 55, 55,
+        /* Size 16 */
+        80, 86, 91, 90, 88, 83, 78, 74, 70, 68, 65, 63, 61, 60, 59, 59, 86, 87,
+        89, 88, 88, 84, 80, 76, 72, 69, 66, 65, 63, 61, 60, 60, 91, 89, 86, 87,
+        87, 84, 82, 78, 74, 71, 68, 66, 64, 62, 61, 61, 90, 88, 87, 84, 82, 79,
+        77, 74, 71, 69, 66, 65, 63, 62, 60, 60, 88, 88, 87, 82, 76, 74, 72, 70,
+        68, 66, 65, 63, 62, 61, 60, 60, 83, 84, 84, 79, 74, 71, 69, 68, 66, 65,
+        63, 62, 61, 60, 59, 59, 78, 80, 82, 77, 72, 69, 66, 65, 64, 63, 62, 61,
+        60, 59, 59, 59, 74, 76, 78, 74, 70, 68, 65, 64, 62, 62, 61, 60, 59, 59,
+        58, 58, 70, 72, 74, 71, 68, 66, 64, 62, 61, 60, 60, 59, 58, 58, 57, 57,
+        68, 69, 71, 69, 66, 65, 63, 62, 60, 60, 59, 58, 58, 57, 57, 57, 65, 66,
+        68, 66, 65, 63, 62, 61, 60, 59, 58, 58, 57, 57, 57, 57, 63, 65, 66, 65,
+        63, 62, 61, 60, 59, 58, 58, 57, 57, 57, 56, 56, 61, 63, 64, 63, 62, 61,
+        60, 59, 58, 58, 57, 57, 57, 56, 56, 56, 60, 61, 62, 62, 61, 60, 59, 59,
+        58, 57, 57, 57, 56, 56, 56, 56, 59, 60, 61, 60, 60, 59, 59, 58, 57, 57,
+        57, 56, 56, 56, 56, 56, 59, 60, 61, 60, 60, 59, 59, 58, 57, 57, 57, 56,
+        56, 56, 56, 56,
+        /* Size 32 */
+        81, 84, 86, 89, 92, 91, 90, 89, 89, 86, 84, 81, 79, 77, 75, 73, 71, 69,
+        68, 67, 65, 64, 64, 63, 62, 61, 61, 60, 59, 59, 59, 59, 84, 85, 87, 89,
+        91, 90, 89, 89, 88, 86, 84, 82, 79, 78, 76, 74, 72, 70, 69, 67, 66, 65,
+        64, 63, 62, 62, 61, 61, 60, 60, 60, 60, 86, 87, 88, 88, 89, 89, 89, 88,
+        88, 86, 84, 82, 80, 78, 76, 75, 73, 71, 70, 68, 67, 66, 65, 64, 63, 62,
+        62, 61, 60, 60, 60, 60, 89, 89, 88, 88, 88, 88, 88, 88, 88, 86, 85, 83,
+        81, 79, 77, 75, 74, 72, 71, 69, 68, 67, 66, 65, 64, 63, 62, 62, 61, 61,
+        61, 61, 92, 91, 89, 88, 86, 87, 87, 88, 88, 86, 85, 83, 82, 80, 78, 76,
+        74, 73, 71, 70, 68, 67, 66, 65, 64, 64, 63, 62, 61, 61, 61, 61, 91, 90,
+        89, 88, 87, 86, 86, 85, 85, 84, 82, 81, 80, 78, 76, 75, 73, 72, 70, 69,
+        68, 67, 66, 65, 64, 63, 62, 62, 61, 61, 61, 61, 90, 89, 89, 88, 87, 86,
+        85, 83, 82, 81, 80, 78, 77, 76, 74, 73, 72, 70, 69, 68, 67, 66, 65, 64,
+        63, 63, 62, 61, 61, 61, 61, 61, 89, 89, 88, 88, 88, 85, 83, 81, 79, 78,
+        77, 76, 75, 73, 72, 71, 70, 69, 68, 67, 66, 65, 64, 64, 63, 62, 62, 61,
+        61, 61, 61, 61, 89, 88, 88, 88, 88, 85, 82, 79, 76, 75, 74, 73, 72, 71,
+        70, 69, 69, 68, 67, 66, 65, 64, 64, 63, 62, 62, 61, 61, 60, 60, 60, 60,
+        86, 86, 86, 86, 86, 84, 81, 78, 75, 74, 73, 72, 71, 70, 69, 68, 67, 67,
+        66, 65, 64, 64, 63, 63, 62, 61, 61, 60, 60, 60, 60, 60, 84, 84, 84, 85,
+        85, 82, 80, 77, 74, 73, 72, 71, 70, 69, 68, 67, 66, 66, 65, 64, 64, 63,
+        63, 62, 61, 61, 61, 60, 60, 60, 60, 60, 81, 82, 82, 83, 83, 81, 78, 76,
+        73, 72, 71, 69, 68, 67, 67, 66, 65, 65, 64, 63, 63, 62, 62, 61, 61, 60,
+        60, 60, 59, 59, 59, 59, 79, 79, 80, 81, 82, 80, 77, 75, 72, 71, 70, 68,
+        67, 66, 65, 65, 64, 64, 63, 63, 62, 62, 61, 61, 60, 60, 60, 59, 59, 59,
+        59, 59, 77, 78, 78, 79, 80, 78, 76, 73, 71, 70, 69, 67, 66, 66, 65, 64,
+        63, 63, 63, 62, 62, 61, 61, 60, 60, 60, 59, 59, 59, 59, 59, 59, 75, 76,
+        76, 77, 78, 76, 74, 72, 70, 69, 68, 67, 65, 65, 64, 63, 63, 62, 62, 61,
+        61, 61, 60, 60, 60, 59, 59, 59, 58, 58, 58, 58, 73, 74, 75, 75, 76, 75,
+        73, 71, 69, 68, 67, 66, 65, 64, 63, 63, 62, 62, 61, 61, 60, 60, 60, 60,
+        59, 59, 59, 58, 58, 58, 58, 58, 71, 72, 73, 74, 74, 73, 72, 70, 69, 67,
+        66, 65, 64, 63, 63, 62, 62, 61, 61, 60, 60, 60, 59, 59, 59, 59, 58, 58,
+        58, 58, 58, 58, 69, 70, 71, 72, 73, 72, 70, 69, 68, 67, 66, 65, 64, 63,
+        62, 62, 61, 61, 60, 60, 60, 59, 59, 59, 58, 58, 58, 58, 58, 58, 58, 58,
+        68, 69, 70, 71, 71, 70, 69, 68, 67, 66, 65, 64, 63, 63, 62, 61, 61, 60,
+        60, 60, 59, 59, 59, 58, 58, 58, 58, 58, 57, 57, 57, 57, 67, 67, 68, 69,
+        70, 69, 68, 67, 66, 65, 64, 63, 63, 62, 61, 61, 60, 60, 60, 59, 59, 59,
+        58, 58, 58, 58, 58, 57, 57, 57, 57, 57, 65, 66, 67, 68, 68, 68, 67, 66,
+        65, 64, 64, 63, 62, 62, 61, 60, 60, 60, 59, 59, 59, 58, 58, 58, 58, 57,
+        57, 57, 57, 57, 57, 57, 64, 65, 66, 67, 67, 67, 66, 65, 64, 64, 63, 62,
+        62, 61, 61, 60, 60, 59, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57, 57, 57,
+        57, 57, 64, 64, 65, 66, 66, 66, 65, 64, 64, 63, 63, 62, 61, 61, 60, 60,
+        59, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57, 57, 57, 57, 57, 57, 63, 63,
+        64, 65, 65, 65, 64, 64, 63, 63, 62, 61, 61, 60, 60, 60, 59, 59, 58, 58,
+        58, 58, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 62, 62, 63, 64, 64, 64,
+        63, 63, 62, 62, 61, 61, 60, 60, 60, 59, 59, 58, 58, 58, 58, 57, 57, 57,
+        57, 57, 57, 56, 56, 56, 56, 56, 61, 62, 62, 63, 64, 63, 63, 62, 62, 61,
+        61, 60, 60, 60, 59, 59, 59, 58, 58, 58, 57, 57, 57, 57, 57, 57, 56, 56,
+        56, 56, 56, 56, 61, 61, 62, 62, 63, 62, 62, 62, 61, 61, 61, 60, 60, 59,
+        59, 59, 58, 58, 58, 58, 57, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56,
+        60, 61, 61, 62, 62, 62, 61, 61, 61, 60, 60, 60, 59, 59, 59, 58, 58, 58,
+        58, 57, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 56, 59, 60, 60, 61,
+        61, 61, 61, 61, 60, 60, 60, 59, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57,
+        57, 57, 56, 56, 56, 56, 56, 56, 56, 56, 59, 60, 60, 61, 61, 61, 61, 61,
+        60, 60, 60, 59, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57, 57, 57, 56, 56,
+        56, 56, 56, 56, 56, 56, 59, 60, 60, 61, 61, 61, 61, 61, 60, 60, 60, 59,
+        59, 59, 58, 58, 58, 58, 57, 57, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56,
+        56, 56, 59, 60, 60, 61, 61, 61, 61, 61, 60, 60, 60, 59, 59, 59, 58, 58,
+        58, 58, 57, 57, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 56 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 53, 52, 48, 53, 50, 49, 48, 52, 49, 46, 45, 48, 48, 45, 43,
+        /* Size 8 */
+        64, 71, 55, 54, 53, 51, 50, 48, 71, 60, 55, 57, 56, 54, 52, 51, 55, 55,
+        52, 53, 53, 52, 51, 49, 54, 57, 53, 51, 50, 50, 49, 48, 53, 56, 53, 50,
+        49, 48, 47, 47, 51, 54, 52, 50, 48, 47, 46, 46, 50, 52, 51, 49, 47, 46,
+        46, 45, 48, 51, 49, 48, 47, 46, 45, 44,
+        /* Size 16 */
+        64, 67, 71, 63, 55, 55, 54, 54, 53, 52, 51, 50, 50, 49, 48, 48, 67, 66,
+        65, 60, 55, 55, 56, 55, 55, 54, 53, 52, 51, 50, 49, 49, 71, 65, 60, 58,
+        55, 56, 57, 57, 56, 55, 54, 53, 52, 51, 51, 51, 63, 60, 58, 56, 54, 54,
+        55, 55, 55, 54, 53, 52, 52, 51, 50, 50, 55, 55, 55, 54, 52, 53, 53, 53,
+        53, 52, 52, 51, 51, 50, 49, 49, 55, 55, 56, 54, 53, 52, 52, 52, 52, 51,
+        51, 50, 50, 49, 49, 49, 54, 56, 57, 55, 53, 52, 51, 51, 50, 50, 50, 49,
+        49, 48, 48, 48, 54, 55, 57, 55, 53, 52, 51, 50, 50, 49, 49, 48, 48, 48,
+        47, 47, 53, 55, 56, 55, 53, 52, 50, 50, 49, 48, 48, 48, 47, 47, 47, 47,
+        52, 54, 55, 54, 52, 51, 50, 49, 48, 48, 47, 47, 47, 47, 46, 46, 51, 53,
+        54, 53, 52, 51, 50, 49, 48, 47, 47, 47, 46, 46, 46, 46, 50, 52, 53, 52,
+        51, 50, 49, 48, 48, 47, 47, 46, 46, 46, 45, 45, 50, 51, 52, 52, 51, 50,
+        49, 48, 47, 47, 46, 46, 46, 45, 45, 45, 49, 50, 51, 51, 50, 49, 48, 48,
+        47, 47, 46, 46, 45, 45, 45, 45, 48, 49, 51, 50, 49, 49, 48, 47, 47, 46,
+        46, 45, 45, 45, 44, 44, 48, 49, 51, 50, 49, 49, 48, 47, 47, 46, 46, 45,
+        45, 45, 44, 44,
+        /* Size 32 */
+        64, 66, 67, 69, 71, 67, 63, 59, 55, 55, 55, 54, 54, 54, 54, 53, 53, 53,
+        52, 52, 51, 51, 50, 50, 50, 49, 49, 48, 48, 48, 48, 48, 66, 66, 67, 67,
+        68, 65, 62, 58, 55, 55, 55, 55, 55, 55, 54, 54, 54, 53, 53, 53, 52, 52,
+        51, 51, 50, 50, 50, 49, 49, 49, 49, 49, 67, 67, 66, 66, 65, 63, 60, 58,
+        55, 55, 55, 55, 56, 55, 55, 55, 55, 54, 54, 53, 53, 52, 52, 51, 51, 51,
+        50, 50, 49, 49, 49, 49, 69, 67, 66, 64, 63, 61, 59, 57, 55, 55, 56, 56,
+        56, 56, 56, 56, 55, 55, 55, 54, 54, 53, 53, 52, 52, 51, 51, 50, 50, 50,
+        50, 50, 71, 68, 65, 63, 60, 59, 58, 56, 55, 55, 56, 56, 57, 57, 57, 56,
+        56, 56, 55, 55, 54, 54, 53, 53, 52, 52, 51, 51, 51, 51, 51, 51, 67, 65,
+        63, 61, 59, 58, 57, 55, 54, 55, 55, 55, 56, 56, 56, 56, 55, 55, 55, 54,
+        54, 53, 53, 52, 52, 52, 51, 51, 50, 50, 50, 50, 63, 62, 60, 59, 58, 57,
+        56, 55, 54, 54, 54, 54, 55, 55, 55, 55, 55, 54, 54, 54, 53, 53, 52, 52,
+        52, 51, 51, 50, 50, 50, 50, 50, 59, 58, 58, 57, 56, 55, 55, 54, 53, 53,
+        53, 54, 54, 54, 54, 54, 54, 53, 53, 53, 53, 52, 52, 52, 51, 51, 50, 50,
+        50, 50, 50, 50, 55, 55, 55, 55, 55, 54, 54, 53, 52, 52, 53, 53, 53, 53,
+        53, 53, 53, 53, 52, 52, 52, 52, 51, 51, 51, 50, 50, 50, 49, 49, 49, 49,
+        55, 55, 55, 55, 55, 55, 54, 53, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52,
+        52, 52, 51, 51, 51, 51, 50, 50, 50, 49, 49, 49, 49, 49, 55, 55, 55, 56,
+        56, 55, 54, 53, 53, 52, 52, 52, 52, 52, 52, 52, 52, 51, 51, 51, 51, 51,
+        50, 50, 50, 50, 49, 49, 49, 49, 49, 49, 54, 55, 55, 56, 56, 55, 54, 54,
+        53, 52, 52, 52, 51, 51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 49, 49,
+        49, 49, 48, 48, 48, 48, 54, 55, 56, 56, 57, 56, 55, 54, 53, 52, 52, 51,
+        51, 51, 51, 50, 50, 50, 50, 50, 50, 49, 49, 49, 49, 49, 48, 48, 48, 48,
+        48, 48, 54, 55, 55, 56, 57, 56, 55, 54, 53, 52, 52, 51, 51, 51, 50, 50,
+        50, 50, 50, 49, 49, 49, 49, 49, 49, 48, 48, 48, 48, 48, 48, 48, 54, 54,
+        55, 56, 57, 56, 55, 54, 53, 52, 52, 51, 51, 50, 50, 50, 50, 49, 49, 49,
+        49, 49, 48, 48, 48, 48, 48, 48, 47, 47, 47, 47, 53, 54, 55, 56, 56, 56,
+        55, 54, 53, 52, 52, 51, 50, 50, 50, 49, 49, 49, 49, 49, 48, 48, 48, 48,
+        48, 48, 47, 47, 47, 47, 47, 47, 53, 54, 55, 55, 56, 55, 55, 54, 53, 52,
+        52, 51, 50, 50, 50, 49, 49, 49, 48, 48, 48, 48, 48, 48, 47, 47, 47, 47,
+        47, 47, 47, 47, 53, 53, 54, 55, 56, 55, 54, 53, 53, 52, 51, 51, 50, 50,
+        49, 49, 49, 48, 48, 48, 48, 48, 47, 47, 47, 47, 47, 47, 46, 46, 46, 46,
+        52, 53, 54, 55, 55, 55, 54, 53, 52, 52, 51, 51, 50, 50, 49, 49, 48, 48,
+        48, 48, 47, 47, 47, 47, 47, 47, 47, 46, 46, 46, 46, 46, 52, 53, 53, 54,
+        55, 54, 54, 53, 52, 52, 51, 50, 50, 49, 49, 49, 48, 48, 48, 47, 47, 47,
+        47, 47, 47, 46, 46, 46, 46, 46, 46, 46, 51, 52, 53, 54, 54, 54, 53, 53,
+        52, 51, 51, 50, 50, 49, 49, 48, 48, 48, 47, 47, 47, 47, 47, 46, 46, 46,
+        46, 46, 46, 46, 46, 46, 51, 52, 52, 53, 54, 53, 53, 52, 52, 51, 51, 50,
+        49, 49, 49, 48, 48, 48, 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, 46, 46,
+        46, 46, 50, 51, 52, 53, 53, 53, 52, 52, 51, 51, 50, 50, 49, 49, 48, 48,
+        48, 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, 45, 45, 45, 45, 45, 50, 51,
+        51, 52, 53, 52, 52, 52, 51, 51, 50, 50, 49, 49, 48, 48, 48, 47, 47, 47,
+        46, 46, 46, 46, 46, 46, 45, 45, 45, 45, 45, 45, 50, 50, 51, 52, 52, 52,
+        52, 51, 51, 50, 50, 49, 49, 49, 48, 48, 47, 47, 47, 47, 46, 46, 46, 46,
+        46, 45, 45, 45, 45, 45, 45, 45, 49, 50, 51, 51, 52, 52, 51, 51, 50, 50,
+        50, 49, 49, 48, 48, 48, 47, 47, 47, 46, 46, 46, 46, 46, 45, 45, 45, 45,
+        45, 45, 45, 45, 49, 50, 50, 51, 51, 51, 51, 50, 50, 50, 49, 49, 48, 48,
+        48, 47, 47, 47, 47, 46, 46, 46, 46, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+        48, 49, 50, 50, 51, 51, 50, 50, 50, 49, 49, 49, 48, 48, 48, 47, 47, 47,
+        46, 46, 46, 46, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 48, 49, 49, 50,
+        51, 50, 50, 50, 49, 49, 49, 48, 48, 48, 47, 47, 47, 46, 46, 46, 46, 46,
+        45, 45, 45, 45, 45, 45, 44, 44, 44, 44, 48, 49, 49, 50, 51, 50, 50, 50,
+        49, 49, 49, 48, 48, 48, 47, 47, 47, 46, 46, 46, 46, 46, 45, 45, 45, 45,
+        45, 45, 44, 44, 44, 44, 48, 49, 49, 50, 51, 50, 50, 50, 49, 49, 49, 48,
+        48, 48, 47, 47, 47, 46, 46, 46, 46, 46, 45, 45, 45, 45, 45, 45, 44, 44,
+        44, 44, 48, 49, 49, 50, 51, 50, 50, 50, 49, 49, 49, 48, 48, 48, 47, 47,
+        47, 46, 46, 46, 46, 46, 45, 45, 45, 45, 45, 45, 44, 44, 44, 44 },
+      { /* Intra matrices */
+        /* Size 4 */
+        85, 70, 68, 63, 70, 66, 64, 62, 68, 64, 60, 58, 63, 62, 58, 56,
+        /* Size 8 */
+        82, 91, 70, 69, 67, 65, 62, 60, 91, 76, 70, 72, 71, 69, 66, 64, 70, 70,
+        66, 67, 67, 66, 64, 62, 69, 72, 67, 64, 63, 62, 61, 60, 67, 71, 67, 63,
+        61, 60, 59, 59, 65, 69, 66, 62, 60, 59, 58, 57, 62, 66, 64, 61, 59, 58,
+        57, 56, 60, 64, 62, 60, 59, 57, 56, 55,
+        /* Size 16 */
+        82, 87, 91, 81, 71, 70, 69, 68, 68, 66, 65, 64, 63, 62, 61, 61, 87, 86,
+        84, 77, 70, 71, 71, 70, 70, 69, 67, 66, 65, 64, 63, 63, 91, 84, 77, 74,
+        70, 71, 73, 72, 72, 71, 70, 68, 67, 65, 64, 64, 81, 77, 74, 71, 68, 69,
+        70, 70, 70, 69, 68, 67, 66, 65, 63, 63, 71, 70, 70, 68, 67, 67, 67, 67,
+        67, 67, 66, 65, 65, 64, 63, 63, 70, 71, 71, 69, 67, 66, 66, 66, 66, 65,
+        65, 64, 63, 62, 62, 62, 69, 71, 73, 70, 67, 66, 65, 64, 64, 63, 63, 62,
+        62, 61, 61, 61, 68, 70, 72, 70, 67, 66, 64, 64, 63, 62, 62, 61, 61, 60,
+        60, 60, 68, 70, 72, 70, 67, 66, 64, 63, 62, 61, 61, 60, 60, 59, 59, 59,
+        66, 69, 71, 69, 67, 65, 63, 62, 61, 61, 60, 60, 59, 59, 58, 58, 65, 67,
+        70, 68, 66, 65, 63, 62, 61, 60, 59, 59, 58, 58, 58, 58, 64, 66, 68, 67,
+        65, 64, 62, 61, 60, 60, 59, 58, 58, 58, 57, 57, 63, 65, 67, 66, 65, 63,
+        62, 61, 60, 59, 58, 58, 57, 57, 57, 57, 62, 64, 65, 65, 64, 62, 61, 60,
+        59, 59, 58, 58, 57, 57, 56, 56, 61, 63, 64, 63, 63, 62, 61, 60, 59, 58,
+        58, 57, 57, 56, 56, 56, 61, 63, 64, 63, 63, 62, 61, 60, 59, 58, 58, 57,
+        57, 56, 56, 56,
+        /* Size 32 */
+        83, 85, 87, 90, 92, 87, 81, 76, 71, 71, 70, 70, 69, 69, 69, 68, 68, 67,
+        67, 66, 66, 65, 64, 64, 63, 63, 62, 62, 61, 61, 61, 61, 85, 86, 87, 87,
+        88, 84, 79, 75, 71, 71, 71, 70, 70, 70, 70, 69, 69, 68, 68, 67, 67, 66,
+        65, 65, 64, 64, 63, 63, 62, 62, 62, 62, 87, 87, 86, 85, 85, 81, 78, 74,
+        71, 71, 71, 71, 71, 71, 71, 70, 70, 69, 69, 68, 68, 67, 66, 66, 65, 65,
+        64, 63, 63, 63, 63, 63, 90, 87, 85, 83, 81, 78, 76, 73, 71, 71, 71, 72,
+        72, 72, 72, 71, 71, 71, 70, 69, 69, 68, 67, 67, 66, 66, 65, 64, 64, 64,
+        64, 64, 92, 88, 85, 81, 77, 76, 74, 72, 70, 71, 72, 72, 73, 73, 73, 72,
+        72, 72, 71, 70, 70, 69, 68, 68, 67, 66, 66, 65, 65, 65, 65, 65, 87, 84,
+        81, 78, 76, 74, 73, 71, 70, 70, 71, 71, 72, 72, 71, 71, 71, 71, 70, 70,
+        69, 68, 68, 67, 67, 66, 65, 65, 64, 64, 64, 64, 81, 79, 78, 76, 74, 73,
+        71, 70, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 69, 69, 68, 68, 67, 66,
+        66, 65, 65, 64, 64, 64, 64, 64, 76, 75, 74, 73, 72, 71, 70, 69, 68, 68,
+        68, 69, 69, 69, 69, 69, 69, 68, 68, 68, 67, 67, 66, 66, 65, 65, 64, 64,
+        63, 63, 63, 63, 71, 71, 71, 71, 70, 70, 69, 68, 67, 67, 67, 67, 68, 68,
+        68, 68, 68, 67, 67, 67, 67, 66, 66, 65, 65, 64, 64, 63, 63, 63, 63, 63,
+        71, 71, 71, 71, 71, 70, 69, 68, 67, 67, 67, 67, 67, 67, 67, 67, 67, 66,
+        66, 66, 66, 65, 65, 65, 64, 64, 63, 63, 62, 62, 62, 62, 70, 71, 71, 71,
+        72, 71, 69, 68, 67, 67, 67, 66, 66, 66, 66, 66, 66, 66, 65, 65, 65, 65,
+        64, 64, 64, 63, 63, 62, 62, 62, 62, 62, 70, 70, 71, 72, 72, 71, 70, 69,
+        67, 67, 66, 66, 66, 65, 65, 65, 65, 65, 65, 64, 64, 64, 63, 63, 63, 63,
+        62, 62, 62, 62, 62, 62, 69, 70, 71, 72, 73, 72, 70, 69, 68, 67, 66, 66,
+        65, 65, 65, 64, 64, 64, 64, 63, 63, 63, 63, 63, 62, 62, 62, 61, 61, 61,
+        61, 61, 69, 70, 71, 72, 73, 72, 70, 69, 68, 67, 66, 65, 65, 64, 64, 64,
+        64, 63, 63, 63, 63, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 69, 70,
+        71, 72, 73, 71, 70, 69, 68, 67, 66, 65, 65, 64, 64, 63, 63, 63, 63, 62,
+        62, 62, 62, 61, 61, 61, 61, 60, 60, 60, 60, 60, 68, 69, 70, 71, 72, 71,
+        70, 69, 68, 67, 66, 65, 64, 64, 63, 63, 63, 62, 62, 62, 62, 61, 61, 61,
+        61, 60, 60, 60, 60, 60, 60, 60, 68, 69, 70, 71, 72, 71, 70, 69, 68, 67,
+        66, 65, 64, 64, 63, 63, 62, 62, 62, 61, 61, 61, 61, 60, 60, 60, 60, 60,
+        59, 59, 59, 59, 67, 68, 69, 71, 72, 71, 70, 68, 67, 66, 66, 65, 64, 63,
+        63, 62, 62, 62, 61, 61, 61, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59,
+        67, 68, 69, 70, 71, 70, 69, 68, 67, 66, 65, 65, 64, 63, 63, 62, 62, 61,
+        61, 61, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 66, 67, 68, 69,
+        70, 70, 69, 68, 67, 66, 65, 64, 63, 63, 62, 62, 61, 61, 61, 60, 60, 60,
+        59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 66, 67, 68, 69, 70, 69, 68, 67,
+        67, 66, 65, 64, 63, 63, 62, 62, 61, 61, 60, 60, 60, 59, 59, 59, 59, 58,
+        58, 58, 58, 58, 58, 58, 65, 66, 67, 68, 69, 68, 68, 67, 66, 65, 65, 64,
+        63, 62, 62, 61, 61, 60, 60, 60, 59, 59, 59, 59, 58, 58, 58, 58, 58, 58,
+        58, 58, 64, 65, 66, 67, 68, 68, 67, 66, 66, 65, 64, 63, 63, 62, 62, 61,
+        61, 60, 60, 59, 59, 59, 59, 58, 58, 58, 58, 58, 57, 57, 57, 57, 64, 65,
+        66, 67, 68, 67, 66, 66, 65, 65, 64, 63, 63, 62, 61, 61, 60, 60, 60, 59,
+        59, 59, 58, 58, 58, 58, 58, 57, 57, 57, 57, 57, 63, 64, 65, 66, 67, 67,
+        66, 65, 65, 64, 64, 63, 62, 62, 61, 61, 60, 60, 59, 59, 59, 58, 58, 58,
+        58, 57, 57, 57, 57, 57, 57, 57, 63, 64, 65, 66, 66, 66, 65, 65, 64, 64,
+        63, 63, 62, 61, 61, 60, 60, 60, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57,
+        57, 57, 57, 57, 62, 63, 64, 65, 66, 65, 65, 64, 64, 63, 63, 62, 62, 61,
+        61, 60, 60, 59, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57, 57, 57, 57, 57,
+        62, 63, 63, 64, 65, 65, 64, 64, 63, 63, 62, 62, 61, 61, 60, 60, 60, 59,
+        59, 58, 58, 58, 58, 57, 57, 57, 57, 57, 56, 56, 56, 56, 61, 62, 63, 64,
+        65, 64, 64, 63, 63, 62, 62, 62, 61, 61, 60, 60, 59, 59, 59, 58, 58, 58,
+        57, 57, 57, 57, 57, 56, 56, 56, 56, 56, 61, 62, 63, 64, 65, 64, 64, 63,
+        63, 62, 62, 62, 61, 61, 60, 60, 59, 59, 59, 58, 58, 58, 57, 57, 57, 57,
+        57, 56, 56, 56, 56, 56, 61, 62, 63, 64, 65, 64, 64, 63, 63, 62, 62, 62,
+        61, 61, 60, 60, 59, 59, 59, 58, 58, 58, 57, 57, 57, 57, 57, 56, 56, 56,
+        56, 56, 61, 62, 63, 64, 65, 64, 64, 63, 63, 62, 62, 62, 61, 61, 60, 60,
+        59, 59, 59, 58, 58, 58, 57, 57, 57, 57, 57, 56, 56, 56, 56, 56 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 62, 54, 50, 62, 55, 52, 49, 54, 52, 49, 48, 50, 49, 48, 47,
+        /* Size 8 */
+        64, 70, 68, 63, 58, 55, 53, 52, 70, 67, 68, 65, 60, 57, 54, 53, 68, 68,
+        61, 59, 57, 55, 53, 52, 63, 65, 59, 56, 54, 53, 52, 51, 58, 60, 57, 54,
+        53, 52, 51, 51, 55, 57, 55, 53, 52, 51, 51, 50, 53, 54, 53, 52, 51, 51,
+        50, 50, 52, 53, 52, 51, 51, 50, 50, 50,
+        /* Size 16 */
+        64, 67, 70, 69, 68, 66, 63, 60, 58, 57, 55, 54, 53, 52, 52, 52, 67, 68,
+        69, 69, 68, 66, 64, 61, 59, 58, 56, 55, 54, 53, 52, 52, 70, 69, 67, 68,
+        68, 66, 65, 62, 60, 59, 57, 56, 54, 53, 53, 53, 69, 69, 68, 66, 65, 63,
+        62, 60, 59, 57, 56, 55, 54, 53, 52, 52, 68, 68, 68, 65, 61, 60, 59, 58,
+        57, 56, 55, 54, 53, 53, 52, 52, 66, 66, 66, 63, 60, 59, 57, 56, 56, 55,
+        54, 53, 53, 52, 52, 52, 63, 64, 65, 62, 59, 57, 56, 55, 54, 54, 53, 53,
+        52, 52, 51, 51, 60, 61, 62, 60, 58, 56, 55, 54, 54, 53, 52, 52, 52, 51,
+        51, 51, 58, 59, 60, 59, 57, 56, 54, 54, 53, 52, 52, 52, 51, 51, 51, 51,
+        57, 58, 59, 57, 56, 55, 54, 53, 52, 52, 51, 51, 51, 51, 50, 50, 55, 56,
+        57, 56, 55, 54, 53, 52, 52, 51, 51, 51, 51, 50, 50, 50, 54, 55, 56, 55,
+        54, 53, 53, 52, 52, 51, 51, 51, 50, 50, 50, 50, 53, 54, 54, 54, 53, 53,
+        52, 52, 51, 51, 51, 50, 50, 50, 50, 50, 52, 53, 53, 53, 53, 52, 52, 51,
+        51, 51, 50, 50, 50, 50, 50, 50, 52, 52, 53, 52, 52, 52, 51, 51, 51, 50,
+        50, 50, 50, 50, 50, 50, 52, 52, 53, 52, 52, 52, 51, 51, 51, 50, 50, 50,
+        50, 50, 50, 50,
+        /* Size 32 */
+        64, 66, 67, 69, 70, 70, 69, 69, 68, 67, 66, 64, 63, 62, 60, 59, 58, 57,
+        57, 56, 55, 54, 54, 53, 53, 53, 52, 52, 52, 52, 52, 52, 66, 67, 68, 69,
+        70, 69, 69, 69, 68, 67, 66, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 55,
+        54, 54, 53, 53, 53, 52, 52, 52, 52, 52, 67, 68, 68, 68, 69, 69, 69, 68,
+        68, 67, 66, 65, 64, 63, 61, 60, 59, 58, 58, 57, 56, 55, 55, 54, 54, 53,
+        53, 52, 52, 52, 52, 52, 69, 69, 68, 68, 68, 68, 68, 68, 68, 67, 66, 65,
+        64, 63, 62, 61, 60, 59, 58, 57, 56, 56, 55, 55, 54, 54, 53, 53, 52, 52,
+        52, 52, 70, 70, 69, 68, 67, 67, 68, 68, 68, 67, 66, 66, 65, 64, 62, 61,
+        60, 59, 59, 58, 57, 56, 56, 55, 54, 54, 53, 53, 53, 53, 53, 53, 70, 69,
+        69, 68, 67, 67, 67, 67, 66, 66, 65, 64, 63, 62, 61, 60, 59, 59, 58, 57,
+        56, 56, 55, 55, 54, 54, 53, 53, 52, 52, 52, 52, 69, 69, 69, 68, 68, 67,
+        66, 65, 65, 64, 63, 63, 62, 61, 60, 59, 59, 58, 57, 57, 56, 55, 55, 54,
+        54, 53, 53, 53, 52, 52, 52, 52, 69, 69, 68, 68, 68, 67, 65, 64, 63, 62,
+        62, 61, 60, 60, 59, 58, 58, 57, 57, 56, 55, 55, 54, 54, 54, 53, 53, 53,
+        52, 52, 52, 52, 68, 68, 68, 68, 68, 66, 65, 63, 61, 61, 60, 60, 59, 58,
+        58, 57, 57, 56, 56, 55, 55, 54, 54, 54, 53, 53, 53, 52, 52, 52, 52, 52,
+        67, 67, 67, 67, 67, 66, 64, 62, 61, 60, 59, 59, 58, 58, 57, 57, 56, 56,
+        55, 55, 54, 54, 54, 53, 53, 53, 52, 52, 52, 52, 52, 52, 66, 66, 66, 66,
+        66, 65, 63, 62, 60, 59, 59, 58, 57, 57, 56, 56, 56, 55, 55, 54, 54, 54,
+        53, 53, 53, 52, 52, 52, 52, 52, 52, 52, 64, 64, 65, 65, 66, 64, 63, 61,
+        60, 59, 58, 57, 57, 56, 56, 55, 55, 55, 54, 54, 54, 53, 53, 53, 52, 52,
+        52, 52, 51, 51, 51, 51, 63, 63, 64, 64, 65, 63, 62, 60, 59, 58, 57, 57,
+        56, 55, 55, 55, 54, 54, 54, 53, 53, 53, 53, 52, 52, 52, 52, 51, 51, 51,
+        51, 51, 62, 62, 63, 63, 64, 62, 61, 60, 58, 58, 57, 56, 55, 55, 55, 54,
+        54, 54, 53, 53, 53, 53, 52, 52, 52, 52, 52, 51, 51, 51, 51, 51, 60, 61,
+        61, 62, 62, 61, 60, 59, 58, 57, 56, 56, 55, 55, 54, 54, 54, 53, 53, 53,
+        52, 52, 52, 52, 52, 51, 51, 51, 51, 51, 51, 51, 59, 60, 60, 61, 61, 60,
+        59, 58, 57, 57, 56, 55, 55, 54, 54, 54, 53, 53, 53, 52, 52, 52, 52, 52,
+        51, 51, 51, 51, 51, 51, 51, 51, 58, 59, 59, 60, 60, 59, 59, 58, 57, 56,
+        56, 55, 54, 54, 54, 53, 53, 53, 52, 52, 52, 52, 52, 51, 51, 51, 51, 51,
+        51, 51, 51, 51, 57, 58, 58, 59, 59, 59, 58, 57, 56, 56, 55, 55, 54, 54,
+        53, 53, 53, 52, 52, 52, 52, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
+        57, 57, 58, 58, 59, 58, 57, 57, 56, 55, 55, 54, 54, 53, 53, 53, 52, 52,
+        52, 52, 51, 51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 56, 56, 57, 57,
+        58, 57, 57, 56, 55, 55, 54, 54, 53, 53, 53, 52, 52, 52, 52, 51, 51, 51,
+        51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 55, 55, 56, 56, 57, 56, 56, 55,
+        55, 54, 54, 54, 53, 53, 52, 52, 52, 52, 51, 51, 51, 51, 51, 51, 51, 50,
+        50, 50, 50, 50, 50, 50, 54, 55, 55, 56, 56, 56, 55, 55, 54, 54, 54, 53,
+        53, 53, 52, 52, 52, 51, 51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50,
+        50, 50, 54, 54, 55, 55, 56, 55, 55, 54, 54, 54, 53, 53, 53, 52, 52, 52,
+        52, 51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 53, 54,
+        54, 55, 55, 55, 54, 54, 54, 53, 53, 53, 52, 52, 52, 52, 51, 51, 51, 51,
+        51, 51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 53, 53, 54, 54, 54, 54,
+        54, 54, 53, 53, 53, 52, 52, 52, 52, 51, 51, 51, 51, 51, 51, 50, 50, 50,
+        50, 50, 50, 50, 50, 50, 50, 50, 53, 53, 53, 54, 54, 54, 53, 53, 53, 53,
+        52, 52, 52, 52, 51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 50, 50,
+        50, 50, 50, 50, 52, 53, 53, 53, 53, 53, 53, 53, 53, 52, 52, 52, 52, 52,
+        51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+        52, 52, 52, 53, 53, 53, 53, 53, 52, 52, 52, 52, 51, 51, 51, 51, 51, 51,
+        50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 52, 52, 52, 52,
+        53, 52, 52, 52, 52, 52, 52, 51, 51, 51, 51, 51, 51, 51, 50, 50, 50, 50,
+        50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 52, 52, 52, 52, 53, 52, 52, 52,
+        52, 52, 52, 51, 51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 50, 50,
+        50, 50, 50, 50, 50, 50, 52, 52, 52, 52, 53, 52, 52, 52, 52, 52, 52, 51,
+        51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+        50, 50, 52, 52, 52, 52, 53, 52, 52, 52, 52, 52, 52, 51, 51, 51, 51, 51,
+        51, 51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50 },
+      { /* Intra matrices */
+        /* Size 4 */
+        80, 77, 67, 61, 77, 68, 63, 60, 67, 63, 60, 58, 61, 60, 58, 57,
+        /* Size 8 */
+        75, 83, 80, 73, 68, 64, 61, 60, 83, 79, 80, 76, 70, 66, 63, 61, 80, 80,
+        72, 69, 66, 64, 62, 60, 73, 76, 69, 65, 63, 61, 60, 59, 68, 70, 66, 63,
+        61, 60, 59, 58, 64, 66, 64, 61, 60, 59, 58, 58, 61, 63, 62, 60, 59, 58,
+        58, 57, 60, 61, 60, 59, 58, 58, 57, 57,
+        /* Size 16 */
+        76, 80, 84, 82, 81, 78, 74, 71, 68, 66, 64, 63, 62, 61, 60, 60, 80, 81,
+        82, 81, 81, 78, 75, 72, 70, 68, 66, 64, 63, 62, 61, 61, 84, 82, 80, 80,
+        81, 79, 76, 74, 71, 69, 67, 65, 64, 63, 61, 61, 82, 81, 80, 78, 77, 75,
+        73, 71, 69, 67, 65, 64, 63, 62, 61, 61, 81, 81, 81, 77, 72, 71, 69, 68,
+        67, 66, 64, 63, 62, 61, 61, 61, 78, 78, 79, 75, 71, 69, 67, 66, 65, 64,
+        63, 62, 62, 61, 60, 60, 74, 75, 76, 73, 69, 67, 66, 65, 64, 63, 62, 61,
+        61, 60, 60, 60, 71, 72, 74, 71, 68, 66, 65, 64, 63, 62, 61, 61, 60, 60,
+        59, 59, 68, 70, 71, 69, 67, 65, 64, 63, 62, 61, 60, 60, 60, 59, 59, 59,
+        66, 68, 69, 67, 66, 64, 63, 62, 61, 61, 60, 60, 59, 59, 59, 59, 64, 66,
+        67, 65, 64, 63, 62, 61, 60, 60, 59, 59, 59, 59, 58, 58, 63, 64, 65, 64,
+        63, 62, 61, 61, 60, 60, 59, 59, 59, 58, 58, 58, 62, 63, 64, 63, 62, 62,
+        61, 60, 60, 59, 59, 59, 58, 58, 58, 58, 61, 62, 63, 62, 61, 61, 60, 60,
+        59, 59, 59, 58, 58, 58, 58, 58, 60, 61, 61, 61, 61, 60, 60, 59, 59, 59,
+        58, 58, 58, 58, 58, 58, 60, 61, 61, 61, 61, 60, 60, 59, 59, 59, 58, 58,
+        58, 58, 58, 58,
+        /* Size 32 */
+        76, 78, 80, 82, 84, 83, 83, 82, 82, 80, 78, 76, 74, 73, 71, 70, 69, 68,
+        67, 66, 65, 64, 63, 63, 62, 62, 61, 61, 60, 60, 60, 60, 78, 79, 81, 82,
+        83, 83, 82, 82, 82, 80, 78, 77, 75, 74, 72, 71, 69, 68, 67, 66, 65, 65,
+        64, 63, 63, 62, 62, 61, 61, 61, 61, 61, 80, 81, 81, 82, 82, 82, 82, 82,
+        81, 80, 79, 77, 76, 74, 73, 71, 70, 69, 68, 67, 66, 65, 64, 64, 63, 63,
+        62, 62, 61, 61, 61, 61, 82, 82, 82, 81, 81, 81, 81, 81, 81, 80, 79, 77,
+        76, 75, 73, 72, 71, 70, 69, 67, 66, 66, 65, 64, 63, 63, 62, 62, 61, 61,
+        61, 61, 84, 83, 82, 81, 80, 80, 81, 81, 81, 80, 79, 78, 77, 75, 74, 73,
+        71, 70, 69, 68, 67, 66, 65, 65, 64, 63, 63, 62, 62, 62, 62, 62, 83, 83,
+        82, 81, 80, 80, 80, 79, 79, 78, 77, 76, 75, 74, 73, 71, 70, 69, 68, 67,
+        66, 66, 65, 64, 64, 63, 63, 62, 62, 62, 62, 62, 83, 82, 82, 81, 81, 80,
+        79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 67, 66, 65, 65, 64,
+        63, 63, 62, 62, 61, 61, 61, 61, 82, 82, 82, 81, 81, 79, 78, 76, 75, 74,
+        73, 72, 71, 71, 70, 69, 68, 67, 67, 66, 65, 65, 64, 63, 63, 62, 62, 62,
+        61, 61, 61, 61, 82, 82, 81, 81, 81, 79, 77, 75, 73, 72, 71, 70, 70, 69,
+        68, 68, 67, 66, 66, 65, 65, 64, 64, 63, 63, 62, 62, 61, 61, 61, 61, 61,
+        80, 80, 80, 80, 80, 78, 76, 74, 72, 71, 70, 70, 69, 68, 67, 67, 66, 66,
+        65, 65, 64, 64, 63, 63, 62, 62, 61, 61, 61, 61, 61, 61, 78, 78, 79, 79,
+        79, 77, 75, 73, 71, 70, 69, 69, 68, 67, 67, 66, 65, 65, 64, 64, 63, 63,
+        63, 62, 62, 62, 61, 61, 61, 61, 61, 61, 76, 77, 77, 77, 78, 76, 74, 72,
+        70, 70, 69, 68, 67, 66, 66, 65, 65, 64, 64, 63, 63, 63, 62, 62, 61, 61,
+        61, 61, 60, 60, 60, 60, 74, 75, 76, 76, 77, 75, 73, 71, 70, 69, 68, 67,
+        66, 65, 65, 64, 64, 63, 63, 63, 62, 62, 62, 61, 61, 61, 61, 60, 60, 60,
+        60, 60, 73, 74, 74, 75, 75, 74, 72, 71, 69, 68, 67, 66, 65, 65, 64, 64,
+        63, 63, 63, 62, 62, 62, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 71, 72,
+        73, 73, 74, 73, 71, 70, 68, 67, 67, 66, 65, 64, 64, 63, 63, 63, 62, 62,
+        62, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 70, 71, 71, 72, 73, 71,
+        70, 69, 68, 67, 66, 65, 64, 64, 63, 63, 62, 62, 62, 61, 61, 61, 61, 60,
+        60, 60, 60, 60, 59, 59, 59, 59, 69, 69, 70, 71, 71, 70, 69, 68, 67, 66,
+        65, 65, 64, 63, 63, 62, 62, 62, 61, 61, 61, 61, 60, 60, 60, 60, 60, 59,
+        59, 59, 59, 59, 68, 68, 69, 70, 70, 69, 68, 67, 66, 66, 65, 64, 63, 63,
+        63, 62, 62, 61, 61, 61, 61, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59,
+        67, 67, 68, 69, 69, 68, 67, 67, 66, 65, 64, 64, 63, 63, 62, 62, 61, 61,
+        61, 61, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 66, 66, 67, 67,
+        68, 67, 67, 66, 65, 65, 64, 63, 63, 62, 62, 61, 61, 61, 61, 60, 60, 60,
+        60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 65, 65, 66, 66, 67, 66, 66, 65,
+        65, 64, 63, 63, 62, 62, 62, 61, 61, 61, 60, 60, 60, 60, 59, 59, 59, 59,
+        59, 59, 59, 59, 59, 59, 64, 65, 65, 66, 66, 66, 65, 65, 64, 64, 63, 63,
+        62, 62, 61, 61, 61, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 58, 58,
+        58, 58, 63, 64, 64, 65, 65, 65, 65, 64, 64, 63, 63, 62, 62, 61, 61, 61,
+        60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 63, 63,
+        64, 64, 65, 64, 64, 63, 63, 63, 62, 62, 61, 61, 61, 60, 60, 60, 60, 59,
+        59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 58, 62, 63, 63, 63, 64, 64,
+        63, 63, 63, 62, 62, 61, 61, 61, 61, 60, 60, 60, 60, 59, 59, 59, 59, 59,
+        59, 58, 58, 58, 58, 58, 58, 58, 62, 62, 63, 63, 63, 63, 63, 62, 62, 62,
+        62, 61, 61, 61, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58,
+        58, 58, 58, 58, 61, 62, 62, 62, 63, 63, 62, 62, 62, 61, 61, 61, 61, 60,
+        60, 60, 60, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 58, 58, 58, 58,
+        61, 61, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 60, 60, 60, 60, 59, 59,
+        59, 59, 59, 59, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 60, 61, 61, 61,
+        62, 62, 61, 61, 61, 61, 61, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 58,
+        58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 60, 61, 61, 61, 62, 62, 61, 61,
+        61, 61, 61, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58,
+        58, 58, 58, 58, 58, 58, 60, 61, 61, 61, 62, 62, 61, 61, 61, 61, 61, 60,
+        60, 60, 60, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 58, 58, 58, 58,
+        58, 58, 60, 61, 61, 61, 62, 62, 61, 61, 61, 61, 61, 60, 60, 60, 60, 59,
+        59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 56, 54, 52, 56, 53, 52, 51, 54, 52, 50, 49, 52, 51, 49, 48,
+        /* Size 8 */
+        64, 69, 57, 56, 55, 54, 53, 52, 69, 61, 57, 58, 58, 57, 55, 53, 57, 57,
+        55, 55, 55, 55, 54, 53, 56, 58, 55, 54, 53, 53, 52, 51, 55, 58, 55, 53,
+        52, 51, 51, 50, 54, 57, 55, 53, 51, 51, 50, 50, 53, 55, 54, 52, 51, 50,
+        49, 49, 52, 53, 53, 51, 50, 50, 49, 49,
+        /* Size 16 */
+        64, 67, 69, 63, 57, 57, 56, 56, 55, 55, 54, 53, 53, 52, 52, 52, 67, 66,
+        65, 61, 57, 57, 57, 57, 57, 56, 55, 55, 54, 53, 52, 52, 69, 65, 61, 59,
+        57, 58, 58, 58, 58, 57, 57, 56, 55, 54, 53, 53, 63, 61, 59, 57, 56, 56,
+        57, 57, 57, 56, 56, 55, 54, 54, 53, 53, 57, 57, 57, 56, 55, 55, 55, 55,
+        55, 55, 55, 54, 54, 53, 53, 53, 57, 57, 58, 56, 55, 55, 54, 54, 54, 54,
+        54, 53, 53, 52, 52, 52, 56, 57, 58, 57, 55, 54, 54, 53, 53, 53, 53, 52,
+        52, 52, 51, 51, 56, 57, 58, 57, 55, 54, 53, 53, 53, 52, 52, 52, 52, 51,
+        51, 51, 55, 57, 58, 57, 55, 54, 53, 53, 52, 52, 51, 51, 51, 51, 50, 50,
+        55, 56, 57, 56, 55, 54, 53, 52, 52, 51, 51, 51, 51, 50, 50, 50, 54, 55,
+        57, 56, 55, 54, 53, 52, 51, 51, 51, 50, 50, 50, 50, 50, 53, 55, 56, 55,
+        54, 53, 52, 52, 51, 51, 50, 50, 50, 50, 49, 49, 53, 54, 55, 54, 54, 53,
+        52, 52, 51, 51, 50, 50, 49, 49, 49, 49, 52, 53, 54, 54, 53, 52, 52, 51,
+        51, 50, 50, 50, 49, 49, 49, 49, 52, 52, 53, 53, 53, 52, 51, 51, 50, 50,
+        50, 49, 49, 49, 49, 49, 52, 52, 53, 53, 53, 52, 51, 51, 50, 50, 50, 49,
+        49, 49, 49, 49,
+        /* Size 32 */
+        64, 65, 67, 68, 69, 66, 63, 60, 57, 57, 57, 57, 56, 56, 56, 56, 55, 55,
+        55, 54, 54, 54, 53, 53, 53, 52, 52, 52, 52, 52, 52, 52, 65, 66, 66, 67,
+        67, 65, 62, 60, 57, 57, 57, 57, 57, 57, 56, 56, 56, 56, 55, 55, 55, 54,
+        54, 54, 53, 53, 53, 52, 52, 52, 52, 52, 67, 66, 66, 65, 65, 63, 61, 59,
+        57, 57, 57, 57, 57, 57, 57, 57, 57, 56, 56, 56, 55, 55, 55, 54, 54, 53,
+        53, 53, 52, 52, 52, 52, 68, 67, 65, 64, 63, 61, 60, 58, 57, 57, 57, 58,
+        58, 58, 58, 57, 57, 57, 57, 56, 56, 56, 55, 55, 54, 54, 54, 53, 53, 53,
+        53, 53, 69, 67, 65, 63, 61, 60, 59, 58, 57, 57, 58, 58, 58, 58, 58, 58,
+        58, 58, 57, 57, 57, 56, 56, 55, 55, 55, 54, 54, 53, 53, 53, 53, 66, 65,
+        63, 61, 60, 59, 58, 57, 56, 57, 57, 57, 58, 58, 57, 57, 57, 57, 57, 56,
+        56, 56, 55, 55, 55, 54, 54, 54, 53, 53, 53, 53, 63, 62, 61, 60, 59, 58,
+        57, 57, 56, 56, 56, 57, 57, 57, 57, 57, 57, 56, 56, 56, 56, 55, 55, 55,
+        54, 54, 54, 53, 53, 53, 53, 53, 60, 60, 59, 58, 58, 57, 57, 56, 55, 55,
+        56, 56, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 54, 54, 54, 53, 53,
+        53, 53, 53, 53, 57, 57, 57, 57, 57, 56, 56, 55, 55, 55, 55, 55, 55, 55,
+        55, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 53, 53, 53, 53, 53, 53, 53,
+        57, 57, 57, 57, 57, 57, 56, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
+        54, 54, 54, 54, 54, 53, 53, 53, 53, 53, 52, 52, 52, 52, 57, 57, 57, 57,
+        58, 57, 56, 56, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 54, 53,
+        53, 53, 53, 53, 52, 52, 52, 52, 52, 52, 57, 57, 57, 58, 58, 57, 57, 56,
+        55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 53, 53, 53, 53, 53, 53, 53, 52,
+        52, 52, 52, 52, 52, 52, 56, 57, 57, 58, 58, 58, 57, 56, 55, 55, 54, 54,
+        54, 54, 53, 53, 53, 53, 53, 53, 53, 53, 52, 52, 52, 52, 52, 52, 51, 51,
+        51, 51, 56, 57, 57, 58, 58, 58, 57, 56, 55, 55, 54, 54, 54, 53, 53, 53,
+        53, 53, 53, 53, 52, 52, 52, 52, 52, 52, 52, 51, 51, 51, 51, 51, 56, 56,
+        57, 58, 58, 57, 57, 56, 55, 55, 54, 54, 53, 53, 53, 53, 53, 52, 52, 52,
+        52, 52, 52, 52, 52, 51, 51, 51, 51, 51, 51, 51, 56, 56, 57, 57, 58, 57,
+        57, 56, 55, 55, 54, 54, 53, 53, 53, 53, 52, 52, 52, 52, 52, 52, 51, 51,
+        51, 51, 51, 51, 51, 51, 51, 51, 55, 56, 57, 57, 58, 57, 57, 56, 55, 55,
+        54, 54, 53, 53, 53, 52, 52, 52, 52, 52, 51, 51, 51, 51, 51, 51, 51, 51,
+        50, 50, 50, 50, 55, 56, 56, 57, 58, 57, 56, 56, 55, 55, 54, 54, 53, 53,
+        52, 52, 52, 52, 52, 51, 51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50,
+        55, 55, 56, 57, 57, 57, 56, 55, 55, 54, 54, 53, 53, 53, 52, 52, 52, 52,
+        51, 51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 50, 54, 55, 56, 56,
+        57, 56, 56, 55, 55, 54, 54, 53, 53, 53, 52, 52, 52, 51, 51, 51, 51, 51,
+        51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 54, 55, 55, 56, 57, 56, 56, 55,
+        55, 54, 54, 53, 53, 52, 52, 52, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50,
+        50, 50, 50, 50, 50, 50, 54, 54, 55, 56, 56, 56, 55, 55, 54, 54, 53, 53,
+        53, 52, 52, 52, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+        50, 50, 53, 54, 55, 55, 56, 55, 55, 55, 54, 54, 53, 53, 52, 52, 52, 51,
+        51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 50, 49, 49, 49, 49, 49, 53, 54,
+        54, 55, 55, 55, 55, 54, 54, 53, 53, 53, 52, 52, 52, 51, 51, 51, 51, 50,
+        50, 50, 50, 50, 50, 50, 49, 49, 49, 49, 49, 49, 53, 53, 54, 54, 55, 55,
+        54, 54, 54, 53, 53, 53, 52, 52, 52, 51, 51, 51, 51, 50, 50, 50, 50, 50,
+        49, 49, 49, 49, 49, 49, 49, 49, 52, 53, 53, 54, 55, 54, 54, 54, 53, 53,
+        53, 52, 52, 52, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 49, 49, 49, 49,
+        49, 49, 49, 49, 52, 53, 53, 54, 54, 54, 54, 53, 53, 53, 52, 52, 52, 52,
+        51, 51, 51, 50, 50, 50, 50, 50, 50, 49, 49, 49, 49, 49, 49, 49, 49, 49,
+        52, 52, 53, 53, 54, 54, 53, 53, 53, 53, 52, 52, 52, 51, 51, 51, 51, 50,
+        50, 50, 50, 50, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 52, 52, 52, 53,
+        53, 53, 53, 53, 53, 52, 52, 52, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50,
+        49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 52, 52, 52, 53, 53, 53, 53, 53,
+        53, 52, 52, 52, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 49, 49, 49, 49,
+        49, 49, 49, 49, 49, 49, 52, 52, 52, 53, 53, 53, 53, 53, 53, 52, 52, 52,
+        51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 49, 49, 49, 49, 49, 49, 49, 49,
+        49, 49, 52, 52, 52, 53, 53, 53, 53, 53, 53, 52, 52, 52, 51, 51, 51, 51,
+        50, 50, 50, 50, 50, 50, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49 },
+      { /* Intra matrices */
+        /* Size 4 */
+        79, 68, 67, 63, 68, 65, 64, 62, 67, 64, 61, 60, 63, 62, 60, 58,
+        /* Size 8 */
+        77, 84, 68, 67, 66, 64, 63, 61, 84, 73, 68, 70, 69, 67, 65, 64, 68, 68,
+        65, 66, 66, 65, 64, 62, 67, 70, 66, 64, 63, 63, 62, 61, 66, 69, 66, 63,
+        62, 61, 60, 60, 64, 67, 65, 63, 61, 60, 59, 59, 63, 65, 64, 62, 60, 59,
+        59, 58, 61, 64, 62, 61, 60, 59, 58, 58,
+        /* Size 16 */
+        77, 81, 84, 76, 69, 68, 68, 67, 67, 66, 65, 64, 63, 62, 62, 62, 81, 80,
+        79, 74, 69, 69, 69, 69, 68, 67, 66, 65, 64, 64, 63, 63, 84, 79, 74, 71,
+        68, 69, 70, 70, 70, 69, 68, 67, 66, 65, 64, 64, 76, 74, 71, 69, 67, 68,
+        68, 68, 68, 67, 67, 66, 65, 64, 63, 63, 69, 69, 68, 67, 66, 66, 66, 66,
+        66, 66, 66, 65, 64, 64, 63, 63, 68, 69, 69, 68, 66, 66, 65, 65, 65, 65,
+        64, 64, 63, 63, 62, 62, 68, 69, 70, 68, 66, 65, 64, 64, 64, 63, 63, 63,
+        62, 62, 61, 61, 67, 69, 70, 68, 66, 65, 64, 64, 63, 63, 62, 62, 62, 61,
+        61, 61, 67, 68, 70, 68, 66, 65, 64, 63, 62, 62, 61, 61, 61, 61, 60, 60,
+        66, 67, 69, 67, 66, 65, 63, 63, 62, 61, 61, 61, 60, 60, 60, 60, 65, 66,
+        68, 67, 66, 64, 63, 62, 61, 61, 60, 60, 60, 59, 59, 59, 64, 65, 67, 66,
+        65, 64, 63, 62, 61, 61, 60, 60, 59, 59, 59, 59, 63, 64, 66, 65, 64, 63,
+        62, 62, 61, 60, 60, 59, 59, 59, 58, 58, 62, 64, 65, 64, 64, 63, 62, 61,
+        61, 60, 59, 59, 59, 58, 58, 58, 62, 63, 64, 63, 63, 62, 61, 61, 60, 60,
+        59, 59, 58, 58, 58, 58, 62, 63, 64, 63, 63, 62, 61, 61, 60, 60, 59, 59,
+        58, 58, 58, 58,
+        /* Size 32 */
+        78, 79, 81, 83, 84, 81, 77, 73, 69, 69, 68, 68, 68, 68, 67, 67, 67, 66,
+        66, 65, 65, 65, 64, 64, 63, 63, 63, 62, 62, 62, 62, 62, 79, 80, 81, 81,
+        82, 79, 75, 72, 69, 69, 69, 69, 69, 68, 68, 68, 68, 67, 67, 66, 66, 65,
+        65, 64, 64, 64, 63, 63, 62, 62, 62, 62, 81, 81, 80, 80, 79, 77, 74, 71,
+        69, 69, 69, 69, 69, 69, 69, 69, 68, 68, 67, 67, 67, 66, 66, 65, 65, 64,
+        64, 63, 63, 63, 63, 63, 83, 81, 80, 78, 76, 75, 73, 71, 69, 69, 69, 70,
+        70, 70, 70, 69, 69, 69, 68, 68, 67, 67, 66, 66, 65, 65, 65, 64, 64, 64,
+        64, 64, 84, 82, 79, 76, 74, 73, 71, 70, 69, 69, 70, 70, 71, 70, 70, 70,
+        70, 70, 69, 69, 68, 68, 67, 67, 66, 66, 65, 65, 64, 64, 64, 64, 81, 79,
+        77, 75, 73, 71, 70, 69, 68, 68, 69, 69, 70, 69, 69, 69, 69, 69, 68, 68,
+        68, 67, 67, 66, 66, 65, 65, 64, 64, 64, 64, 64, 77, 75, 74, 73, 71, 70,
+        69, 68, 67, 68, 68, 68, 69, 68, 68, 68, 68, 68, 68, 67, 67, 67, 66, 66,
+        65, 65, 64, 64, 64, 64, 64, 64, 73, 72, 71, 71, 70, 69, 68, 67, 67, 67,
+        67, 67, 68, 67, 67, 67, 67, 67, 67, 67, 66, 66, 66, 65, 65, 65, 64, 64,
+        63, 63, 63, 63, 69, 69, 69, 69, 69, 68, 67, 67, 66, 66, 66, 66, 66, 67,
+        67, 67, 67, 66, 66, 66, 66, 65, 65, 65, 64, 64, 64, 63, 63, 63, 63, 63,
+        69, 69, 69, 69, 69, 68, 68, 67, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+        66, 65, 65, 65, 65, 64, 64, 64, 63, 63, 63, 63, 63, 63, 68, 69, 69, 69,
+        70, 69, 68, 67, 66, 66, 66, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 64,
+        64, 64, 64, 63, 63, 63, 62, 62, 62, 62, 68, 69, 69, 70, 70, 69, 68, 67,
+        66, 66, 66, 65, 65, 65, 65, 65, 65, 64, 64, 64, 64, 64, 63, 63, 63, 63,
+        63, 62, 62, 62, 62, 62, 68, 69, 69, 70, 71, 70, 69, 68, 66, 66, 66, 65,
+        65, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62,
+        62, 62, 68, 68, 69, 70, 70, 69, 68, 67, 67, 66, 65, 65, 64, 64, 64, 64,
+        64, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61, 61, 67, 68,
+        69, 70, 70, 69, 68, 67, 67, 66, 65, 65, 64, 64, 64, 63, 63, 63, 63, 63,
+        62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 67, 68, 69, 69, 70, 69,
+        68, 67, 67, 66, 65, 65, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62,
+        61, 61, 61, 61, 61, 61, 61, 61, 67, 68, 68, 69, 70, 69, 68, 67, 67, 66,
+        65, 65, 64, 64, 63, 63, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61,
+        60, 60, 60, 60, 66, 67, 68, 69, 70, 69, 68, 67, 66, 66, 65, 64, 64, 63,
+        63, 63, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60,
+        66, 67, 67, 68, 69, 68, 68, 67, 66, 66, 65, 64, 64, 63, 63, 62, 62, 62,
+        62, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 65, 66, 67, 68,
+        69, 68, 67, 67, 66, 65, 65, 64, 63, 63, 63, 62, 62, 62, 61, 61, 61, 61,
+        61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 65, 66, 67, 67, 68, 68, 67, 66,
+        66, 65, 65, 64, 63, 63, 62, 62, 62, 61, 61, 61, 61, 60, 60, 60, 60, 60,
+        60, 60, 59, 59, 59, 59, 65, 65, 66, 67, 68, 67, 67, 66, 65, 65, 64, 64,
+        63, 63, 62, 62, 62, 61, 61, 61, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59,
+        59, 59, 64, 65, 66, 66, 67, 67, 66, 66, 65, 65, 64, 63, 63, 63, 62, 62,
+        61, 61, 61, 61, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 64, 64,
+        65, 66, 67, 66, 66, 65, 65, 64, 64, 63, 63, 62, 62, 62, 61, 61, 61, 60,
+        60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 63, 64, 65, 65, 66, 66,
+        65, 65, 64, 64, 64, 63, 63, 62, 62, 61, 61, 61, 60, 60, 60, 60, 60, 59,
+        59, 59, 59, 59, 59, 59, 59, 59, 63, 64, 64, 65, 66, 65, 65, 65, 64, 64,
+        63, 63, 62, 62, 62, 61, 61, 61, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59,
+        59, 59, 59, 59, 63, 63, 64, 65, 65, 65, 64, 64, 64, 63, 63, 63, 62, 62,
+        61, 61, 61, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58,
+        62, 63, 63, 64, 65, 64, 64, 64, 63, 63, 63, 62, 62, 62, 61, 61, 61, 60,
+        60, 60, 60, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 62, 62, 63, 64,
+        64, 64, 64, 63, 63, 63, 62, 62, 62, 61, 61, 61, 60, 60, 60, 60, 59, 59,
+        59, 59, 59, 59, 58, 58, 58, 58, 58, 58, 62, 62, 63, 64, 64, 64, 64, 63,
+        63, 63, 62, 62, 62, 61, 61, 61, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59,
+        58, 58, 58, 58, 58, 58, 62, 62, 63, 64, 64, 64, 64, 63, 63, 63, 62, 62,
+        62, 61, 61, 61, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58,
+        58, 58, 62, 62, 63, 64, 64, 64, 64, 63, 63, 63, 62, 62, 62, 61, 61, 61,
+        60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 58 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 63, 57, 54, 63, 58, 55, 54, 57, 55, 53, 53, 54, 54, 53, 52,
+        /* Size 8 */
+        64, 68, 67, 63, 60, 58, 56, 55, 68, 66, 67, 64, 61, 59, 57, 56, 67, 67,
+        62, 61, 59, 58, 57, 56, 63, 64, 61, 58, 57, 56, 56, 55, 60, 61, 59, 57,
+        56, 56, 55, 55, 58, 59, 58, 56, 56, 55, 55, 54, 56, 57, 57, 56, 55, 55,
+        54, 54, 55, 56, 56, 55, 55, 54, 54, 54,
+        /* Size 16 */
+        64, 66, 68, 68, 67, 65, 63, 62, 60, 59, 58, 57, 56, 56, 55, 55, 66, 67,
+        67, 67, 67, 65, 64, 62, 61, 60, 58, 58, 57, 56, 56, 56, 68, 67, 66, 66,
+        67, 66, 64, 63, 61, 60, 59, 58, 57, 57, 56, 56, 68, 67, 66, 65, 65, 63,
+        62, 61, 60, 59, 58, 58, 57, 56, 56, 56, 67, 67, 67, 65, 62, 61, 61, 60,
+        59, 58, 58, 57, 57, 56, 56, 56, 65, 65, 66, 63, 61, 60, 59, 59, 58, 58,
+        57, 57, 56, 56, 56, 56, 63, 64, 64, 62, 61, 59, 58, 58, 57, 57, 56, 56,
+        56, 56, 55, 55, 62, 62, 63, 61, 60, 59, 58, 57, 57, 56, 56, 56, 55, 55,
+        55, 55, 60, 61, 61, 60, 59, 58, 57, 57, 56, 56, 56, 55, 55, 55, 55, 55,
+        59, 60, 60, 59, 58, 58, 57, 56, 56, 56, 55, 55, 55, 55, 55, 55, 58, 58,
+        59, 58, 58, 57, 56, 56, 56, 55, 55, 55, 55, 55, 54, 54, 57, 58, 58, 58,
+        57, 57, 56, 56, 55, 55, 55, 55, 55, 54, 54, 54, 56, 57, 57, 57, 57, 56,
+        56, 55, 55, 55, 55, 55, 54, 54, 54, 54, 56, 56, 57, 56, 56, 56, 56, 55,
+        55, 55, 55, 54, 54, 54, 54, 54, 55, 56, 56, 56, 56, 56, 55, 55, 55, 55,
+        54, 54, 54, 54, 54, 54, 55, 56, 56, 56, 56, 56, 55, 55, 55, 55, 54, 54,
+        54, 54, 54, 54,
+        /* Size 32 */
+        64, 65, 66, 67, 68, 68, 68, 67, 67, 66, 65, 64, 63, 62, 62, 61, 60, 59,
+        59, 58, 58, 57, 57, 57, 56, 56, 56, 56, 55, 55, 55, 55, 65, 66, 66, 67,
+        68, 68, 67, 67, 67, 66, 65, 64, 63, 63, 62, 61, 60, 60, 59, 59, 58, 58,
+        57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 66, 66, 67, 67, 67, 67, 67, 67,
+        67, 66, 65, 65, 64, 63, 62, 61, 61, 60, 60, 59, 58, 58, 58, 57, 57, 57,
+        56, 56, 56, 56, 56, 56, 67, 67, 67, 67, 67, 67, 67, 67, 67, 66, 66, 65,
+        64, 63, 63, 62, 61, 60, 60, 59, 59, 58, 58, 58, 57, 57, 57, 56, 56, 56,
+        56, 56, 68, 68, 67, 67, 66, 66, 66, 67, 67, 66, 66, 65, 64, 64, 63, 62,
+        61, 61, 60, 60, 59, 59, 58, 58, 57, 57, 57, 56, 56, 56, 56, 56, 68, 68,
+        67, 67, 66, 66, 66, 66, 66, 65, 65, 64, 63, 63, 62, 62, 61, 60, 60, 59,
+        59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 56, 56, 68, 67, 67, 67, 66, 66,
+        65, 65, 65, 64, 63, 63, 62, 62, 61, 61, 60, 60, 59, 59, 58, 58, 58, 57,
+        57, 57, 56, 56, 56, 56, 56, 56, 67, 67, 67, 67, 67, 66, 65, 64, 63, 63,
+        62, 62, 62, 61, 61, 60, 60, 59, 59, 58, 58, 58, 57, 57, 57, 57, 56, 56,
+        56, 56, 56, 56, 67, 67, 67, 67, 67, 66, 65, 63, 62, 62, 61, 61, 61, 60,
+        60, 59, 59, 59, 58, 58, 58, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56,
+        66, 66, 66, 66, 66, 65, 64, 63, 62, 61, 61, 60, 60, 60, 59, 59, 59, 58,
+        58, 58, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 56, 65, 65, 65, 66,
+        66, 65, 63, 62, 61, 61, 60, 60, 59, 59, 59, 59, 58, 58, 58, 57, 57, 57,
+        57, 56, 56, 56, 56, 56, 56, 56, 56, 56, 64, 64, 65, 65, 65, 64, 63, 62,
+        61, 60, 60, 59, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57, 56, 56, 56, 56,
+        56, 56, 55, 55, 55, 55, 63, 63, 64, 64, 64, 63, 62, 62, 61, 60, 59, 59,
+        58, 58, 58, 58, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 55, 55, 55,
+        55, 55, 62, 63, 63, 63, 64, 63, 62, 61, 60, 60, 59, 59, 58, 58, 58, 57,
+        57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 62, 62,
+        62, 63, 63, 62, 61, 61, 60, 59, 59, 58, 58, 58, 57, 57, 57, 57, 56, 56,
+        56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 61, 61, 61, 62, 62, 62,
+        61, 60, 59, 59, 59, 58, 58, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56, 55,
+        55, 55, 55, 55, 55, 55, 55, 55, 60, 60, 61, 61, 61, 61, 60, 60, 59, 59,
+        58, 58, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55,
+        55, 55, 55, 55, 59, 60, 60, 60, 61, 60, 60, 59, 59, 58, 58, 58, 57, 57,
+        57, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
+        59, 59, 60, 60, 60, 60, 59, 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 56,
+        56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 58, 59, 59, 59,
+        60, 59, 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 56, 56, 56, 55, 55, 55,
+        55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 58, 58, 58, 59, 59, 59, 58, 58,
+        58, 57, 57, 57, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55,
+        55, 55, 54, 54, 54, 54, 57, 58, 58, 58, 59, 58, 58, 58, 57, 57, 57, 57,
+        56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 54, 54, 54,
+        54, 54, 57, 57, 58, 58, 58, 58, 58, 57, 57, 57, 57, 56, 56, 56, 56, 56,
+        55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 57, 57,
+        57, 58, 58, 58, 57, 57, 57, 57, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55,
+        55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 56, 57, 57, 57, 57, 57,
+        57, 57, 57, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 55, 54,
+        54, 54, 54, 54, 54, 54, 54, 54, 56, 56, 57, 57, 57, 57, 57, 57, 56, 56,
+        56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54,
+        54, 54, 54, 54, 56, 56, 56, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 55,
+        55, 55, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54,
+        56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55,
+        55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 55, 56, 56, 56,
+        56, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 55, 54, 54,
+        54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 55, 56, 56, 56, 56, 56, 56, 56,
+        56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54,
+        54, 54, 54, 54, 54, 54, 55, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 55,
+        55, 55, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54,
+        54, 54, 55, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55,
+        55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54 },
+      { /* Intra matrices */
+        /* Size 4 */
+        74, 72, 66, 62, 72, 67, 63, 61, 66, 63, 61, 60, 62, 61, 60, 59,
+        /* Size 8 */
+        71, 76, 75, 70, 66, 64, 62, 61, 76, 73, 74, 71, 68, 65, 63, 62, 75, 74,
+        69, 67, 65, 64, 62, 61, 70, 71, 67, 64, 63, 62, 61, 61, 66, 68, 65, 63,
+        62, 61, 61, 60, 64, 65, 64, 62, 61, 60, 60, 60, 62, 63, 62, 61, 61, 60,
+        60, 59, 61, 62, 61, 61, 60, 60, 59, 59,
+        /* Size 16 */
+        71, 74, 77, 76, 75, 73, 70, 68, 67, 65, 64, 63, 62, 62, 61, 61, 74, 75,
+        75, 75, 75, 73, 71, 69, 67, 66, 65, 64, 63, 62, 62, 62, 77, 75, 74, 74,
+        75, 73, 72, 70, 68, 67, 66, 65, 64, 63, 62, 62, 76, 75, 74, 73, 72, 71,
+        70, 68, 67, 66, 65, 64, 63, 63, 62, 62, 75, 75, 75, 72, 69, 68, 67, 66,
+        66, 65, 64, 63, 63, 62, 62, 62, 73, 73, 73, 71, 68, 67, 66, 65, 65, 64,
+        63, 63, 62, 62, 61, 61, 70, 71, 72, 70, 67, 66, 65, 64, 63, 63, 63, 62,
+        62, 61, 61, 61, 68, 69, 70, 68, 66, 65, 64, 64, 63, 62, 62, 62, 61, 61,
+        61, 61, 67, 67, 68, 67, 66, 65, 63, 63, 62, 62, 62, 61, 61, 61, 61, 61,
+        65, 66, 67, 66, 65, 64, 63, 62, 62, 62, 61, 61, 61, 61, 60, 60, 64, 65,
+        66, 65, 64, 63, 63, 62, 62, 61, 61, 61, 60, 60, 60, 60, 63, 64, 65, 64,
+        63, 63, 62, 62, 61, 61, 61, 60, 60, 60, 60, 60, 62, 63, 64, 63, 63, 62,
+        62, 61, 61, 61, 60, 60, 60, 60, 60, 60, 62, 62, 63, 63, 62, 62, 61, 61,
+        61, 61, 60, 60, 60, 60, 60, 60, 61, 62, 62, 62, 62, 61, 61, 61, 61, 60,
+        60, 60, 60, 60, 60, 60, 61, 62, 62, 62, 62, 61, 61, 61, 61, 60, 60, 60,
+        60, 60, 60, 60,
+        /* Size 32 */
+        72, 73, 74, 76, 77, 76, 76, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66,
+        66, 65, 64, 64, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 73, 74, 75, 75,
+        76, 76, 76, 75, 75, 74, 73, 72, 71, 70, 69, 68, 67, 67, 66, 65, 65, 64,
+        64, 63, 63, 63, 62, 62, 62, 62, 62, 62, 74, 75, 75, 75, 76, 75, 75, 75,
+        75, 74, 73, 72, 71, 70, 70, 69, 68, 67, 66, 66, 65, 65, 64, 64, 63, 63,
+        63, 62, 62, 62, 62, 62, 76, 75, 75, 75, 75, 75, 75, 75, 75, 74, 73, 73,
+        72, 71, 70, 69, 68, 67, 67, 66, 65, 65, 64, 64, 63, 63, 63, 62, 62, 62,
+        62, 62, 77, 76, 76, 75, 74, 74, 75, 75, 75, 74, 74, 73, 72, 71, 70, 69,
+        69, 68, 67, 66, 66, 65, 65, 64, 64, 63, 63, 63, 62, 62, 62, 62, 76, 76,
+        75, 75, 74, 74, 74, 74, 74, 73, 72, 72, 71, 70, 69, 69, 68, 67, 67, 66,
+        65, 65, 64, 64, 64, 63, 63, 63, 62, 62, 62, 62, 76, 76, 75, 75, 75, 74,
+        73, 73, 72, 72, 71, 70, 70, 69, 69, 68, 67, 67, 66, 66, 65, 65, 64, 64,
+        63, 63, 63, 62, 62, 62, 62, 62, 76, 75, 75, 75, 75, 74, 73, 72, 71, 70,
+        70, 69, 69, 68, 68, 67, 66, 66, 66, 65, 65, 64, 64, 63, 63, 63, 63, 62,
+        62, 62, 62, 62, 75, 75, 75, 75, 75, 74, 72, 71, 69, 69, 68, 68, 67, 67,
+        67, 66, 66, 65, 65, 65, 64, 64, 64, 63, 63, 63, 62, 62, 62, 62, 62, 62,
+        74, 74, 74, 74, 74, 73, 72, 70, 69, 68, 68, 67, 67, 66, 66, 66, 65, 65,
+        65, 64, 64, 64, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 73, 73, 73, 73,
+        74, 72, 71, 70, 68, 68, 67, 67, 66, 66, 65, 65, 65, 64, 64, 64, 63, 63,
+        63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 72, 72, 72, 73, 73, 72, 70, 69,
+        68, 67, 67, 66, 66, 65, 65, 65, 64, 64, 64, 63, 63, 63, 63, 62, 62, 62,
+        62, 62, 61, 61, 61, 61, 71, 71, 71, 72, 72, 71, 70, 69, 67, 67, 66, 66,
+        65, 65, 64, 64, 64, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61,
+        61, 61, 70, 70, 70, 71, 71, 70, 69, 68, 67, 66, 66, 65, 65, 64, 64, 64,
+        63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 69, 69,
+        70, 70, 70, 69, 69, 68, 67, 66, 65, 65, 64, 64, 64, 63, 63, 63, 63, 62,
+        62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 68, 68, 69, 69, 69, 69,
+        68, 67, 66, 66, 65, 65, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62,
+        61, 61, 61, 61, 61, 61, 61, 61, 67, 67, 68, 68, 69, 68, 67, 66, 66, 65,
+        65, 64, 64, 63, 63, 63, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61,
+        61, 61, 61, 61, 66, 67, 67, 67, 68, 67, 67, 66, 65, 65, 64, 64, 63, 63,
+        63, 63, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61,
+        66, 66, 66, 67, 67, 67, 66, 66, 65, 65, 64, 64, 63, 63, 63, 62, 62, 62,
+        62, 62, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 65, 65, 66, 66,
+        66, 66, 66, 65, 65, 64, 64, 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61,
+        61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 64, 65, 65, 65, 66, 65, 65, 65,
+        64, 64, 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 61,
+        60, 60, 60, 60, 60, 60, 64, 64, 65, 65, 65, 65, 65, 64, 64, 64, 63, 63,
+        63, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60,
+        60, 60, 63, 64, 64, 64, 65, 64, 64, 64, 64, 63, 63, 63, 62, 62, 62, 62,
+        61, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 63, 63,
+        64, 64, 64, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61, 61,
+        61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 63, 63, 63, 63, 64, 64,
+        63, 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 60, 60,
+        60, 60, 60, 60, 60, 60, 60, 60, 62, 63, 63, 63, 63, 63, 63, 63, 63, 62,
+        62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60,
+        60, 60, 60, 60, 62, 62, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 61,
+        61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+        62, 62, 62, 62, 63, 63, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61,
+        61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 62, 62, 62, 62,
+        62, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60,
+        60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 62, 62, 62, 62, 62, 62, 62, 62,
+        62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60,
+        60, 60, 60, 60, 60, 60, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 61,
+        61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+        60, 60, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61,
+        61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 58, 57, 55, 58, 56, 56, 55, 57, 56, 54, 53, 55, 55, 53, 53,
+        /* Size 8 */
+        64, 68, 59, 59, 58, 57, 56, 55, 68, 62, 59, 60, 60, 59, 58, 57, 59, 59,
+        58, 58, 58, 57, 57, 56, 59, 60, 58, 57, 56, 56, 56, 55, 58, 60, 58, 56,
+        56, 55, 55, 55, 57, 59, 57, 56, 55, 55, 54, 54, 56, 58, 57, 56, 55, 54,
+        54, 54, 55, 57, 56, 55, 55, 54, 54, 53,
+        /* Size 16 */
+        64, 66, 68, 63, 59, 59, 59, 58, 58, 58, 57, 57, 56, 56, 55, 55, 66, 65,
+        65, 62, 59, 59, 59, 59, 59, 58, 58, 57, 57, 56, 56, 56, 68, 65, 62, 60,
+        59, 60, 60, 60, 60, 59, 59, 58, 58, 57, 57, 57, 63, 62, 60, 59, 58, 59,
+        59, 59, 59, 58, 58, 58, 57, 57, 56, 56, 59, 59, 59, 58, 58, 58, 58, 58,
+        58, 58, 57, 57, 57, 56, 56, 56, 59, 59, 60, 59, 58, 58, 57, 57, 57, 57,
+        57, 57, 56, 56, 56, 56, 59, 59, 60, 59, 58, 57, 57, 57, 56, 56, 56, 56,
+        56, 55, 55, 55, 58, 59, 60, 59, 58, 57, 57, 56, 56, 56, 56, 55, 55, 55,
+        55, 55, 58, 59, 60, 59, 58, 57, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55,
+        58, 58, 59, 58, 58, 57, 56, 56, 55, 55, 55, 55, 55, 54, 54, 54, 57, 58,
+        59, 58, 57, 57, 56, 56, 55, 55, 55, 54, 54, 54, 54, 54, 57, 57, 58, 58,
+        57, 57, 56, 55, 55, 55, 54, 54, 54, 54, 54, 54, 56, 57, 58, 57, 57, 56,
+        56, 55, 55, 55, 54, 54, 54, 54, 54, 54, 56, 56, 57, 57, 56, 56, 55, 55,
+        55, 54, 54, 54, 54, 54, 53, 53, 55, 56, 57, 56, 56, 56, 55, 55, 55, 54,
+        54, 54, 54, 53, 53, 53, 55, 56, 57, 56, 56, 56, 55, 55, 55, 54, 54, 54,
+        54, 53, 53, 53,
+        /* Size 32 */
+        64, 65, 66, 67, 68, 66, 63, 61, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58,
+        58, 57, 57, 57, 57, 56, 56, 56, 56, 55, 55, 55, 55, 55, 65, 65, 66, 66,
+        66, 64, 63, 61, 59, 59, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 57, 57,
+        57, 57, 56, 56, 56, 56, 56, 56, 56, 56, 66, 66, 65, 65, 65, 63, 62, 61,
+        59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57,
+        56, 56, 56, 56, 56, 56, 67, 66, 65, 64, 63, 62, 61, 60, 59, 59, 59, 60,
+        60, 60, 60, 59, 59, 59, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57, 56, 56,
+        56, 56, 68, 66, 65, 63, 62, 61, 60, 60, 59, 59, 60, 60, 60, 60, 60, 60,
+        60, 60, 59, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57, 57, 57, 57, 66, 64,
+        63, 62, 61, 61, 60, 59, 59, 59, 59, 59, 60, 59, 59, 59, 59, 59, 59, 59,
+        58, 58, 58, 58, 57, 57, 57, 57, 56, 56, 56, 56, 63, 63, 62, 61, 60, 60,
+        59, 59, 58, 58, 59, 59, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 57,
+        57, 57, 57, 57, 56, 56, 56, 56, 61, 61, 61, 60, 60, 59, 59, 58, 58, 58,
+        58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 57, 57, 57, 57, 57, 56,
+        56, 56, 56, 56, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 58, 58, 58,
+        58, 58, 58, 58, 58, 58, 57, 57, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56,
+        59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 57,
+        57, 57, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 56, 59, 59, 59, 59,
+        60, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57,
+        57, 56, 56, 56, 56, 56, 56, 56, 56, 56, 59, 59, 59, 60, 60, 59, 59, 58,
+        58, 58, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56,
+        56, 56, 55, 55, 55, 55, 59, 59, 59, 60, 60, 60, 59, 58, 58, 58, 57, 57,
+        57, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55,
+        55, 55, 58, 59, 59, 60, 60, 59, 59, 58, 58, 58, 57, 57, 57, 57, 56, 56,
+        56, 56, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 58, 59,
+        59, 60, 60, 59, 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56,
+        56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 58, 59, 59, 59, 60, 59,
+        59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55,
+        55, 55, 55, 55, 55, 55, 55, 55, 58, 58, 59, 59, 60, 59, 59, 58, 58, 58,
+        57, 57, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
+        55, 55, 55, 55, 58, 58, 59, 59, 60, 59, 59, 58, 58, 57, 57, 57, 56, 56,
+        56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54,
+        58, 58, 58, 59, 59, 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 56, 55, 55,
+        55, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 57, 58, 58, 59,
+        59, 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55,
+        55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 57, 57, 58, 58, 59, 58, 58, 58,
+        57, 57, 57, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54,
+        54, 54, 54, 54, 54, 54, 57, 57, 58, 58, 58, 58, 58, 58, 57, 57, 57, 56,
+        56, 56, 56, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 54,
+        54, 54, 57, 57, 57, 58, 58, 58, 58, 57, 57, 57, 57, 56, 56, 56, 55, 55,
+        55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 56, 57,
+        57, 58, 58, 58, 57, 57, 57, 57, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55,
+        54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 56, 56, 57, 57, 58, 57,
+        57, 57, 57, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54,
+        54, 54, 54, 54, 54, 54, 54, 54, 56, 56, 57, 57, 57, 57, 57, 57, 57, 56,
+        56, 56, 56, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54,
+        53, 53, 53, 53, 56, 56, 56, 57, 57, 57, 57, 57, 56, 56, 56, 56, 55, 55,
+        55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 54, 53, 53, 53, 53, 53,
+        55, 56, 56, 57, 57, 57, 57, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 54,
+        54, 54, 54, 54, 54, 54, 54, 54, 53, 53, 53, 53, 53, 53, 55, 56, 56, 56,
+        57, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54,
+        54, 54, 54, 53, 53, 53, 53, 53, 53, 53, 55, 56, 56, 56, 57, 56, 56, 56,
+        56, 56, 56, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 53,
+        53, 53, 53, 53, 53, 53, 55, 56, 56, 56, 57, 56, 56, 56, 56, 56, 56, 55,
+        55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 53, 53, 53, 53, 53,
+        53, 53, 55, 56, 56, 56, 57, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55,
+        55, 54, 54, 54, 54, 54, 54, 54, 54, 53, 53, 53, 53, 53, 53, 53 },
+      { /* Intra matrices */
+        /* Size 4 */
+        74, 67, 66, 63, 67, 65, 64, 63, 66, 64, 62, 61, 63, 63, 61, 60,
+        /* Size 8 */
+        72, 77, 67, 66, 65, 64, 63, 62, 77, 70, 66, 68, 67, 66, 65, 64, 67, 66,
+        65, 65, 65, 65, 64, 63, 66, 68, 65, 64, 63, 63, 63, 62, 65, 67, 65, 63,
+        62, 62, 62, 61, 64, 66, 65, 63, 62, 61, 61, 60, 63, 65, 64, 63, 62, 61,
+        60, 60, 62, 64, 63, 62, 61, 60, 60, 60,
+        /* Size 16 */
+        73, 75, 77, 72, 67, 67, 66, 66, 66, 65, 64, 64, 63, 63, 62, 62, 75, 74,
+        74, 70, 67, 67, 67, 67, 67, 66, 65, 65, 64, 64, 63, 63, 77, 74, 70, 69,
+        67, 67, 68, 68, 68, 67, 67, 66, 65, 65, 64, 64, 72, 70, 69, 67, 66, 66,
+        67, 67, 67, 66, 66, 65, 65, 64, 64, 64, 67, 67, 67, 66, 65, 65, 65, 65,
+        65, 65, 65, 64, 64, 64, 63, 63, 67, 67, 67, 66, 65, 65, 65, 65, 65, 64,
+        64, 64, 63, 63, 63, 63, 66, 67, 68, 67, 65, 65, 64, 64, 64, 64, 63, 63,
+        63, 63, 62, 62, 66, 67, 68, 67, 65, 65, 64, 64, 63, 63, 63, 63, 62, 62,
+        62, 62, 66, 67, 68, 67, 65, 65, 64, 63, 63, 62, 62, 62, 62, 62, 61, 61,
+        65, 66, 67, 66, 65, 64, 64, 63, 62, 62, 62, 62, 61, 61, 61, 61, 64, 65,
+        67, 66, 65, 64, 63, 63, 62, 62, 62, 61, 61, 61, 61, 61, 64, 65, 66, 65,
+        64, 64, 63, 63, 62, 62, 61, 61, 61, 61, 60, 60, 63, 64, 65, 65, 64, 63,
+        63, 62, 62, 61, 61, 61, 61, 60, 60, 60, 63, 64, 65, 64, 64, 63, 63, 62,
+        62, 61, 61, 61, 60, 60, 60, 60, 62, 63, 64, 64, 63, 63, 62, 62, 61, 61,
+        61, 60, 60, 60, 60, 60, 62, 63, 64, 64, 63, 63, 62, 62, 61, 61, 61, 60,
+        60, 60, 60, 60,
+        /* Size 32 */
+        73, 74, 75, 76, 77, 75, 72, 70, 67, 67, 67, 67, 66, 66, 66, 66, 66, 65,
+        65, 65, 65, 64, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62, 74, 74, 75, 75,
+        76, 73, 71, 69, 67, 67, 67, 67, 67, 67, 67, 66, 66, 66, 66, 65, 65, 65,
+        65, 64, 64, 64, 63, 63, 63, 63, 63, 63, 75, 75, 74, 74, 74, 72, 70, 69,
+        67, 67, 67, 67, 67, 67, 67, 67, 67, 66, 66, 66, 66, 65, 65, 65, 64, 64,
+        64, 64, 63, 63, 63, 63, 76, 75, 74, 73, 72, 71, 70, 68, 67, 67, 67, 68,
+        68, 68, 68, 67, 67, 67, 67, 66, 66, 66, 65, 65, 65, 65, 64, 64, 64, 64,
+        64, 64, 77, 76, 74, 72, 70, 70, 69, 68, 67, 67, 68, 68, 68, 68, 68, 68,
+        68, 68, 67, 67, 67, 66, 66, 66, 65, 65, 65, 64, 64, 64, 64, 64, 75, 73,
+        72, 71, 70, 69, 68, 67, 67, 67, 67, 67, 68, 67, 67, 67, 67, 67, 67, 67,
+        66, 66, 66, 65, 65, 65, 64, 64, 64, 64, 64, 64, 72, 71, 70, 70, 69, 68,
+        67, 67, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 66, 66, 66, 66, 65, 65,
+        65, 65, 64, 64, 64, 64, 64, 64, 70, 69, 69, 68, 68, 67, 67, 66, 66, 66,
+        66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 65, 65, 65, 65, 64, 64, 64, 64,
+        64, 64, 64, 64, 67, 67, 67, 67, 67, 67, 66, 66, 65, 65, 65, 65, 66, 66,
+        66, 66, 66, 65, 65, 65, 65, 65, 65, 64, 64, 64, 64, 64, 63, 63, 63, 63,
+        67, 67, 67, 67, 67, 67, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
+        65, 65, 65, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 67, 67, 67, 67,
+        68, 67, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 64, 64, 64, 64,
+        64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 67, 67, 67, 68, 68, 67, 67, 66,
+        65, 65, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63,
+        63, 63, 63, 63, 63, 63, 66, 67, 67, 68, 68, 68, 67, 66, 66, 65, 65, 65,
+        64, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62,
+        62, 62, 66, 67, 67, 68, 68, 67, 67, 66, 66, 65, 65, 65, 64, 64, 64, 64,
+        64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 66, 67,
+        67, 68, 68, 67, 67, 66, 66, 65, 65, 64, 64, 64, 64, 64, 63, 63, 63, 63,
+        63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 66, 66, 67, 67, 68, 67,
+        67, 66, 66, 65, 65, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 62, 62,
+        62, 62, 62, 62, 62, 62, 62, 62, 66, 66, 67, 67, 68, 67, 67, 66, 66, 65,
+        65, 64, 64, 64, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62,
+        62, 62, 62, 62, 65, 66, 66, 67, 68, 67, 67, 66, 65, 65, 65, 64, 64, 64,
+        63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61,
+        65, 66, 66, 67, 67, 67, 66, 66, 65, 65, 64, 64, 64, 63, 63, 63, 63, 62,
+        62, 62, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 65, 65, 66, 66,
+        67, 67, 66, 66, 65, 65, 64, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62, 62,
+        62, 62, 61, 61, 61, 61, 61, 61, 61, 61, 65, 65, 66, 66, 67, 66, 66, 65,
+        65, 65, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61,
+        61, 61, 61, 61, 61, 61, 64, 65, 65, 66, 66, 66, 66, 65, 65, 64, 64, 64,
+        63, 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 61, 61,
+        61, 61, 64, 65, 65, 65, 66, 66, 65, 65, 65, 64, 64, 64, 63, 63, 63, 62,
+        62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 64, 64,
+        65, 65, 66, 65, 65, 65, 64, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62, 62,
+        61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 63, 64, 64, 65, 65, 65,
+        65, 64, 64, 64, 64, 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61,
+        61, 61, 61, 60, 60, 60, 60, 60, 63, 64, 64, 65, 65, 65, 65, 64, 64, 64,
+        63, 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 60, 60,
+        60, 60, 60, 60, 63, 63, 64, 64, 65, 64, 64, 64, 64, 64, 63, 63, 63, 62,
+        62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60,
+        63, 63, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62, 61,
+        61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 62, 63, 63, 64,
+        64, 64, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61,
+        61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 62, 63, 63, 64, 64, 64, 64, 64,
+        63, 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 60, 60,
+        60, 60, 60, 60, 60, 60, 62, 63, 63, 64, 64, 64, 64, 64, 63, 63, 63, 63,
+        62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60,
+        60, 60, 62, 63, 63, 64, 64, 64, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62,
+        62, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 63, 60, 59, 63, 61, 59, 59, 60, 59, 58, 58, 59, 59, 58, 58,
+        /* Size 8 */
+        64, 66, 66, 64, 62, 61, 60, 60, 66, 65, 65, 64, 63, 61, 61, 60, 66, 65,
+        63, 62, 61, 61, 60, 60, 64, 64, 62, 61, 61, 60, 60, 59, 62, 63, 61, 61,
+        60, 60, 59, 59, 61, 61, 61, 60, 60, 59, 59, 59, 60, 61, 60, 60, 59, 59,
+        59, 59, 60, 60, 60, 59, 59, 59, 59, 59,
+        /* Size 16 */
+        64, 65, 66, 66, 66, 65, 64, 63, 62, 61, 61, 60, 60, 60, 60, 60, 65, 65,
+        66, 66, 66, 65, 64, 63, 62, 62, 61, 61, 60, 60, 60, 60, 66, 66, 65, 65,
+        65, 65, 64, 63, 63, 62, 61, 61, 61, 60, 60, 60, 66, 66, 65, 65, 64, 64,
+        63, 63, 62, 62, 61, 61, 60, 60, 60, 60, 66, 66, 65, 64, 63, 63, 62, 62,
+        61, 61, 61, 60, 60, 60, 60, 60, 65, 65, 65, 64, 63, 62, 62, 61, 61, 61,
+        60, 60, 60, 60, 60, 60, 64, 64, 64, 63, 62, 62, 61, 61, 61, 60, 60, 60,
+        60, 60, 59, 59, 63, 63, 63, 63, 62, 61, 61, 61, 60, 60, 60, 60, 60, 59,
+        59, 59, 62, 62, 63, 62, 61, 61, 61, 60, 60, 60, 60, 60, 59, 59, 59, 59,
+        61, 62, 62, 62, 61, 61, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 61, 61,
+        61, 61, 61, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 60, 61, 61, 61,
+        60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 60, 60, 61, 60, 60, 60,
+        60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 59,
+        59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59,
+        59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59,
+        59, 59, 59, 59,
+        /* Size 32 */
+        64, 65, 65, 66, 66, 66, 66, 66, 66, 65, 65, 64, 64, 63, 63, 62, 62, 62,
+        61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 65, 65, 65, 66,
+        66, 66, 66, 66, 66, 65, 65, 64, 64, 63, 63, 62, 62, 62, 62, 61, 61, 61,
+        61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 65, 65, 65, 66, 66, 66, 66, 66,
+        66, 65, 65, 64, 64, 63, 63, 63, 62, 62, 62, 61, 61, 61, 61, 61, 60, 60,
+        60, 60, 60, 60, 60, 60, 66, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 64,
+        64, 64, 63, 63, 62, 62, 62, 62, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60,
+        60, 60, 66, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 64, 64, 63, 63,
+        63, 62, 62, 62, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 66, 66,
+        66, 65, 65, 65, 65, 65, 65, 65, 64, 64, 64, 63, 63, 63, 62, 62, 62, 62,
+        61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 66, 66, 66, 65, 65, 65,
+        65, 65, 64, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62, 61, 61, 61, 61, 61,
+        60, 60, 60, 60, 60, 60, 60, 60, 66, 66, 66, 65, 65, 65, 65, 64, 64, 63,
+        63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60,
+        60, 60, 60, 60, 66, 66, 66, 65, 65, 65, 64, 64, 63, 63, 63, 62, 62, 62,
+        62, 62, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+        65, 65, 65, 65, 65, 65, 64, 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61,
+        61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 65, 65, 65, 65,
+        65, 64, 64, 63, 63, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 60, 60,
+        60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 64, 64, 64, 64, 65, 64, 63, 63,
+        62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60,
+        60, 60, 60, 60, 60, 60, 64, 64, 64, 64, 64, 64, 63, 63, 62, 62, 62, 61,
+        61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59,
+        59, 59, 63, 63, 63, 64, 64, 63, 63, 62, 62, 62, 61, 61, 61, 61, 61, 61,
+        60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 63, 63,
+        63, 63, 63, 63, 63, 62, 62, 62, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60,
+        60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 62, 62, 63, 63, 63, 63,
+        62, 62, 62, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+        60, 59, 59, 59, 59, 59, 59, 59, 62, 62, 62, 62, 63, 62, 62, 62, 61, 61,
+        61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59,
+        59, 59, 59, 59, 62, 62, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 60, 60,
+        60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+        61, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60,
+        60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 61, 61, 61, 62,
+        62, 62, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59,
+        59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 61, 61, 61, 61, 61, 61, 61, 61,
+        61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59,
+        59, 59, 59, 59, 59, 59, 61, 61, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60,
+        60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+        59, 59, 60, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60,
+        60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60,
+        61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59,
+        59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 61, 60,
+        60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59,
+        59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+        60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+        59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+        59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+        60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59,
+        59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60,
+        60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+        59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 60,
+        60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+        59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+        59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+        59, 59, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59,
+        59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59 },
+      { /* Intra matrices */
+        /* Size 4 */
+        69, 68, 65, 63, 68, 65, 64, 63, 65, 64, 62, 62, 63, 63, 62, 62,
+        /* Size 8 */
+        67, 70, 69, 67, 65, 64, 63, 62, 70, 69, 69, 68, 66, 64, 63, 63, 69, 69,
+        66, 65, 64, 64, 63, 63, 67, 68, 65, 64, 63, 63, 63, 62, 65, 66, 64, 63,
+        63, 62, 62, 62, 64, 64, 64, 63, 62, 62, 62, 62, 63, 63, 63, 63, 62, 62,
+        62, 62, 62, 63, 63, 62, 62, 62, 62, 62,
+        /* Size 16 */
+        68, 69, 70, 70, 69, 68, 67, 66, 65, 65, 64, 64, 63, 63, 63, 63, 69, 69,
+        69, 69, 69, 68, 67, 66, 66, 65, 64, 64, 63, 63, 63, 63, 70, 69, 69, 69,
+        69, 68, 68, 67, 66, 65, 65, 64, 64, 63, 63, 63, 70, 69, 69, 68, 68, 67,
+        67, 66, 65, 65, 64, 64, 63, 63, 63, 63, 69, 69, 69, 68, 66, 66, 65, 65,
+        65, 64, 64, 64, 63, 63, 63, 63, 68, 68, 68, 67, 66, 65, 65, 65, 64, 64,
+        64, 63, 63, 63, 63, 63, 67, 67, 68, 67, 65, 65, 64, 64, 64, 63, 63, 63,
+        63, 63, 62, 62, 66, 66, 67, 66, 65, 65, 64, 64, 63, 63, 63, 63, 63, 62,
+        62, 62, 65, 66, 66, 65, 65, 64, 64, 63, 63, 63, 63, 63, 62, 62, 62, 62,
+        65, 65, 65, 65, 64, 64, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 64, 64,
+        65, 64, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 64, 64, 64, 64,
+        64, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 63, 63, 64, 63, 63, 63,
+        63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 62,
+        62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62,
+        62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62,
+        62, 62, 62, 62,
+        /* Size 32 */
+        68, 68, 69, 70, 70, 70, 70, 70, 69, 69, 68, 68, 67, 67, 66, 66, 65, 65,
+        65, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 68, 69, 69, 69,
+        70, 70, 70, 69, 69, 69, 68, 68, 67, 67, 66, 66, 65, 65, 65, 65, 64, 64,
+        64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 69, 69, 69, 69, 70, 69, 69, 69,
+        69, 69, 68, 68, 67, 67, 67, 66, 66, 65, 65, 65, 64, 64, 64, 64, 63, 63,
+        63, 63, 63, 63, 63, 63, 70, 69, 69, 69, 69, 69, 69, 69, 69, 69, 68, 68,
+        68, 67, 67, 66, 66, 66, 65, 65, 65, 64, 64, 64, 64, 63, 63, 63, 63, 63,
+        63, 63, 70, 70, 70, 69, 69, 69, 69, 69, 69, 69, 69, 68, 68, 67, 67, 67,
+        66, 66, 65, 65, 65, 65, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 70, 70,
+        69, 69, 69, 69, 69, 69, 69, 68, 68, 68, 67, 67, 67, 66, 66, 65, 65, 65,
+        65, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 70, 70, 69, 69, 69, 69,
+        68, 68, 68, 68, 67, 67, 67, 66, 66, 66, 65, 65, 65, 65, 64, 64, 64, 64,
+        64, 63, 63, 63, 63, 63, 63, 63, 70, 69, 69, 69, 69, 69, 68, 68, 67, 67,
+        67, 66, 66, 66, 66, 65, 65, 65, 65, 64, 64, 64, 64, 64, 63, 63, 63, 63,
+        63, 63, 63, 63, 69, 69, 69, 69, 69, 69, 68, 67, 67, 66, 66, 66, 66, 65,
+        65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63,
+        69, 69, 69, 69, 69, 68, 68, 67, 66, 66, 66, 66, 65, 65, 65, 65, 65, 64,
+        64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 68, 68, 68, 68,
+        69, 68, 67, 67, 66, 66, 66, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 64,
+        63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 68, 68, 68, 68, 68, 68, 67, 66,
+        66, 66, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63,
+        63, 63, 63, 63, 63, 63, 67, 67, 67, 68, 68, 67, 67, 66, 66, 65, 65, 65,
+        64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
+        63, 63, 67, 67, 67, 67, 67, 67, 66, 66, 65, 65, 65, 65, 64, 64, 64, 64,
+        64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 66, 66,
+        67, 67, 67, 67, 66, 66, 65, 65, 65, 64, 64, 64, 64, 64, 63, 63, 63, 63,
+        63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 66, 66, 66, 66, 67, 66,
+        66, 65, 65, 65, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63,
+        63, 63, 62, 62, 62, 62, 62, 62, 65, 65, 66, 66, 66, 66, 65, 65, 65, 65,
+        64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62,
+        62, 62, 62, 62, 65, 65, 65, 66, 66, 65, 65, 65, 65, 64, 64, 64, 64, 63,
+        63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62,
+        65, 65, 65, 65, 65, 65, 65, 65, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63,
+        63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 64, 65, 65, 65,
+        65, 65, 65, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62,
+        62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 64, 64, 64, 65, 65, 65, 64, 64,
+        64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62,
+        62, 62, 62, 62, 62, 62, 64, 64, 64, 64, 65, 64, 64, 64, 64, 64, 64, 63,
+        63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+        62, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63,
+        63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 64,
+        64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62,
+        62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 64, 64, 64,
+        64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62,
+        62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 64, 64, 63, 63, 63, 63,
+        63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+        62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
+        63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+        63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62,
+        62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63,
+        63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62,
+        62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63,
+        63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+        62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
+        63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+        62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62,
+        62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 61, 60, 59, 61, 60, 60, 59, 60, 60, 59, 58, 59, 59, 58, 58,
+        /* Size 8 */
+        64, 66, 61, 61, 61, 60, 60, 59, 66, 63, 61, 62, 62, 61, 61, 60, 61, 61,
+        61, 61, 61, 61, 60, 60, 61, 62, 61, 60, 60, 60, 60, 59, 61, 62, 61, 60,
+        60, 59, 59, 59, 60, 61, 61, 60, 59, 59, 59, 59, 60, 61, 60, 60, 59, 59,
+        59, 59, 59, 60, 60, 59, 59, 59, 59, 58,
+        /* Size 16 */
+        64, 65, 66, 64, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 59, 59, 65, 65,
+        64, 63, 61, 61, 62, 61, 61, 61, 61, 61, 60, 60, 60, 60, 66, 64, 63, 62,
+        61, 62, 62, 62, 62, 62, 61, 61, 61, 60, 60, 60, 64, 63, 62, 62, 61, 61,
+        61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 61, 61, 61, 61, 61, 61, 61, 61,
+        61, 61, 61, 60, 60, 60, 60, 60, 61, 61, 62, 61, 61, 61, 60, 60, 60, 60,
+        60, 60, 60, 60, 60, 60, 61, 62, 62, 61, 61, 60, 60, 60, 60, 60, 60, 60,
+        60, 60, 59, 59, 61, 61, 62, 61, 61, 60, 60, 60, 60, 60, 60, 60, 59, 59,
+        59, 59, 61, 61, 62, 61, 61, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59,
+        61, 61, 62, 61, 61, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 60, 61,
+        61, 61, 61, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 60, 61, 61, 61,
+        60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 61, 60, 60, 60,
+        60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 59,
+        59, 59, 59, 59, 59, 59, 58, 58, 59, 60, 60, 60, 60, 60, 59, 59, 59, 59,
+        59, 59, 59, 58, 58, 58, 59, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59,
+        59, 58, 58, 58,
+        /* Size 32 */
+        64, 64, 65, 65, 66, 65, 64, 63, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61,
+        61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 64, 65, 65, 65,
+        65, 64, 63, 62, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 60,
+        60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 65, 65, 65, 65, 64, 64, 63, 62,
+        61, 61, 61, 62, 62, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60,
+        60, 60, 60, 60, 60, 60, 65, 65, 65, 64, 64, 63, 63, 62, 61, 61, 62, 62,
+        62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60,
+        60, 60, 66, 65, 64, 64, 63, 63, 62, 62, 61, 62, 62, 62, 62, 62, 62, 62,
+        62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 65, 64,
+        64, 63, 63, 62, 62, 62, 61, 61, 61, 62, 62, 62, 62, 62, 62, 61, 61, 61,
+        61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 64, 63, 63, 63, 62, 62,
+        62, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61,
+        60, 60, 60, 60, 60, 60, 60, 60, 63, 62, 62, 62, 62, 62, 61, 61, 61, 61,
+        61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60,
+        60, 60, 60, 60, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61,
+        61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+        61, 61, 61, 61, 62, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61,
+        60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 61, 61, 61, 62,
+        62, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+        60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 61, 61, 62, 62, 62, 62, 61, 61,
+        61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+        60, 60, 59, 59, 59, 59, 61, 61, 62, 62, 62, 62, 61, 61, 61, 61, 60, 60,
+        60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59,
+        59, 59, 61, 61, 61, 62, 62, 62, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60,
+        60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 61, 61,
+        61, 62, 62, 62, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+        60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 61, 61, 61, 62, 62, 62,
+        61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59,
+        59, 59, 59, 59, 59, 59, 59, 59, 61, 61, 61, 62, 62, 62, 61, 61, 61, 61,
+        60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+        59, 59, 59, 59, 61, 61, 61, 61, 62, 61, 61, 61, 61, 61, 60, 60, 60, 60,
+        60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+        61, 61, 61, 61, 62, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 59, 59,
+        59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 61, 61, 61,
+        61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59,
+        59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 61, 61, 61, 61, 61, 61, 61,
+        61, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+        59, 59, 59, 59, 59, 59, 60, 60, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60,
+        60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+        59, 59, 60, 60, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 59,
+        59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60,
+        60, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59,
+        59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 61, 61,
+        60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+        59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 61, 60, 60, 60, 60, 60,
+        60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+        58, 58, 58, 58, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59,
+        59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58,
+        60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59,
+        59, 59, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 58, 59, 60, 60, 60,
+        60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+        59, 59, 59, 58, 58, 58, 58, 58, 58, 58, 59, 60, 60, 60, 60, 60, 60, 60,
+        60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 58,
+        58, 58, 58, 58, 58, 58, 59, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59,
+        59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58,
+        58, 58, 59, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59,
+        59, 59, 59, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 58, 58 },
+      { /* Intra matrices */
+        /* Size 4 */
+        69, 65, 65, 64, 65, 64, 64, 63, 65, 64, 63, 62, 64, 63, 62, 62,
+        /* Size 8 */
+        68, 70, 65, 65, 65, 64, 63, 63, 70, 67, 65, 66, 66, 65, 64, 64, 65, 65,
+        64, 64, 65, 64, 64, 63, 65, 66, 64, 64, 64, 63, 63, 63, 65, 66, 65, 64,
+        63, 63, 63, 63, 64, 65, 64, 63, 63, 63, 62, 62, 63, 64, 64, 63, 63, 62,
+        62, 62, 63, 64, 63, 63, 63, 62, 62, 62,
+        /* Size 16 */
+        68, 69, 70, 68, 65, 65, 65, 65, 65, 64, 64, 64, 64, 63, 63, 63, 69, 69,
+        69, 67, 65, 65, 66, 65, 65, 65, 65, 64, 64, 64, 64, 64, 70, 69, 67, 66,
+        65, 66, 66, 66, 66, 65, 65, 65, 65, 64, 64, 64, 68, 67, 66, 66, 65, 65,
+        65, 65, 65, 65, 65, 65, 64, 64, 64, 64, 65, 65, 65, 65, 64, 65, 65, 65,
+        65, 65, 64, 64, 64, 64, 64, 64, 65, 65, 66, 65, 65, 64, 64, 64, 64, 64,
+        64, 64, 64, 63, 63, 63, 65, 66, 66, 65, 65, 64, 64, 64, 64, 64, 64, 63,
+        63, 63, 63, 63, 65, 65, 66, 65, 65, 64, 64, 64, 64, 63, 63, 63, 63, 63,
+        63, 63, 65, 65, 66, 65, 65, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63,
+        64, 65, 65, 65, 65, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 65,
+        65, 65, 64, 64, 64, 63, 63, 63, 63, 63, 63, 62, 62, 62, 64, 64, 65, 65,
+        64, 64, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 64, 64, 65, 64, 64, 64,
+        63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 63, 64, 64, 64, 64, 63, 63, 63,
+        63, 63, 62, 62, 62, 62, 62, 62, 63, 64, 64, 64, 64, 63, 63, 63, 63, 63,
+        62, 62, 62, 62, 62, 62, 63, 64, 64, 64, 64, 63, 63, 63, 63, 63, 62, 62,
+        62, 62, 62, 62,
+        /* Size 32 */
+        68, 69, 69, 70, 71, 69, 68, 67, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65,
+        65, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 69, 69, 69, 69,
+        70, 69, 68, 67, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 64, 64,
+        64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 69, 69, 69, 69, 69, 68, 67, 66,
+        65, 66, 66, 66, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 70, 69, 69, 68, 68, 67, 67, 66, 65, 66, 66, 66,
+        66, 66, 66, 66, 66, 65, 65, 65, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64,
+        64, 64, 71, 70, 69, 68, 67, 67, 66, 66, 65, 66, 66, 66, 66, 66, 66, 66,
+        66, 66, 66, 65, 65, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 64, 69, 69,
+        68, 67, 67, 66, 66, 66, 65, 65, 65, 66, 66, 66, 66, 66, 66, 65, 65, 65,
+        65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 64, 64, 68, 68, 67, 67, 66, 66,
+        66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 67, 67, 66, 66, 66, 66, 65, 65, 65, 65,
+        65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
+        65, 65, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        65, 65, 66, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 66, 66,
+        66, 65, 65, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 65, 65, 66, 66, 66, 66, 65, 65,
+        65, 65, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        63, 63, 63, 63, 63, 63, 65, 65, 66, 66, 66, 66, 65, 65, 65, 65, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63,
+        63, 63, 65, 65, 66, 66, 66, 66, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 65, 65,
+        65, 66, 66, 66, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63,
+        63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 65, 65, 65, 66, 66, 66,
+        65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63,
+        63, 63, 63, 63, 63, 63, 63, 63, 65, 65, 65, 66, 66, 66, 65, 65, 65, 65,
+        64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
+        63, 63, 63, 63, 65, 65, 65, 65, 66, 65, 65, 65, 65, 64, 64, 64, 64, 64,
+        64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
+        65, 65, 65, 65, 66, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 63, 63, 63,
+        63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 65, 65, 65,
+        65, 65, 65, 65, 65, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63,
+        63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 64, 64, 65, 65, 65, 65, 65, 65,
+        64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
+        63, 62, 62, 62, 62, 62, 64, 64, 65, 65, 65, 65, 65, 65, 64, 64, 64, 64,
+        64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62,
+        62, 62, 64, 64, 64, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 63, 63, 63,
+        63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 64, 64,
+        64, 65, 65, 65, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63,
+        63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 64, 64, 64, 64, 65, 64,
+        64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62,
+        62, 62, 62, 62, 62, 62, 62, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62,
+        62, 62, 62, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63,
+        63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+        63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63,
+        63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 64, 64,
+        64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62,
+        62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 64, 64, 64, 64, 64, 64,
+        64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62,
+        62, 62, 62, 62, 62, 62, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 63, 63,
+        63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+        62, 62, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63,
+        63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62 } } },
+  { {   /* Luma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        /* Size 8 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        /* Size 16 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64,
+        /* Size 32 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
+      { /* Intra matrices */
+        /* Size 4 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        /* Size 8 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        /* Size 16 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64,
+        /* Size 32 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 } },
+    {   /* Chroma matrices */
+      { /* Inter matrices */
+        /* Size 4 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        /* Size 8 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        /* Size 16 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64,
+        /* Size 32 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
+      { /* Intra matrices */
+        /* Size 4 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        /* Size 8 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        /* Size 16 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64,
+        /* Size 32 */
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 } } }
+};
+
+#endif
diff --git a/av1/common/quant_common.h b/av1/common/quant_common.h
new file mode 100644
index 0000000..6ceed49
--- /dev/null
+++ b/av1/common/quant_common.h
@@ -0,0 +1,81 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_QUANT_COMMON_H_
+#define VP10_COMMON_QUANT_COMMON_H_
+
+#include "aom/vpx_codec.h"
+#include "av1/common/seg_common.h"
+#include "av1/common/enums.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MINQ 0
+#define MAXQ 255
+#define QINDEX_RANGE (MAXQ - MINQ + 1)
+#define QINDEX_BITS 8
+#if CONFIG_AOM_QM
+// Total number of QM sets stored
+#define QM_LEVEL_BITS 4
+#define NUM_QM_LEVELS (1 << QM_LEVEL_BITS)
+/* Offset into the list of QMs. Actual number of levels used is
+   (NUM_QM_LEVELS-AOM_QM_OFFSET)
+   Lower value of AOM_QM_OFFSET implies more heavily weighted matrices.*/
+#define DEFAULT_QM_FIRST (NUM_QM_LEVELS / 2)
+#define DEFAULT_QM_LAST (NUM_QM_LEVELS - 1)
+#endif
+
+struct VP10Common;
+
+int16_t vp10_dc_quant(int qindex, int delta, vpx_bit_depth_t bit_depth);
+int16_t vp10_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth);
+
+int vp10_get_qindex(const struct segmentation *seg, int segment_id,
+                    int base_qindex);
+#if CONFIG_AOM_QM
+// Reduce the large number of quantizers to a smaller number of levels for which
+// different matrices may be defined
+static inline int aom_get_qmlevel(int qindex, int first, int last) {
+  int qmlevel = (qindex * (last + 1 - first) + QINDEX_RANGE / 2) / QINDEX_RANGE;
+  qmlevel = VPXMIN(qmlevel + first, NUM_QM_LEVELS - 1);
+  return qmlevel;
+}
+void aom_qm_init(struct VP10Common *cm);
+qm_val_t *aom_iqmatrix(struct VP10Common *cm, int qindex, int comp,
+                       int log2sizem2, int is_intra);
+qm_val_t *aom_qmatrix(struct VP10Common *cm, int qindex, int comp,
+                      int log2sizem2, int is_intra);
+#endif
+
+#if CONFIG_NEW_QUANT
+
+#define QUANT_PROFILES 3
+#define QUANT_RANGES 2
+#define NUQ_KNOTS 3
+
+typedef tran_low_t dequant_val_type_nuq[NUQ_KNOTS + 1];
+typedef tran_low_t cuml_bins_type_nuq[NUQ_KNOTS];
+void vp10_get_dequant_val_nuq(int q, int qindex, int band, tran_low_t *dq,
+                              tran_low_t *cuml_bins, int dq_off_index);
+tran_low_t vp10_dequant_abscoeff_nuq(int v, int q, const tran_low_t *dq);
+tran_low_t vp10_dequant_coeff_nuq(int v, int q, const tran_low_t *dq);
+
+static INLINE int get_dq_profile_from_ctx(int q_ctx) {
+  return VPXMIN(q_ctx, QUANT_PROFILES - 1);
+}
+#endif  // CONFIG_NEW_QUANT
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_QUANT_COMMON_H_
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
new file mode 100644
index 0000000..0c3b93a
--- /dev/null
+++ b/av1/common/reconinter.c
@@ -0,0 +1,2044 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "./vpx_scale_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+#include "./vpx_config.h"
+
+#include "aom/vpx_integer.h"
+#include "aom_dsp/blend.h"
+
+#include "av1/common/blockd.h"
+#include "av1/common/reconinter.h"
+#include "av1/common/reconintra.h"
+#if CONFIG_OBMC
+#include "av1/common/onyxc_int.h"
+#endif  // CONFIG_OBMC
+#if CONFIG_GLOBAL_MOTION
+#include "av1/common/warped_motion.h"
+#endif  // CONFIG_GLOBAL_MOTION
+
+#if CONFIG_EXT_INTER
+
+#define NSMOOTHERS 1
+static int get_masked_weight(int m, int smoothness) {
+#define SMOOTHER_LEN 32
+  static const uint8_t smoothfn[NSMOOTHERS][2 * SMOOTHER_LEN + 1] = { {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 1, 2, 4, 7, 13, 21, 32, 43, 51, 57, 60, 62, 63, 64, 64, 64, 64, 64, 64,
+      64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+      64, 64,
+  } };
+  if (m < -SMOOTHER_LEN)
+    return 0;
+  else if (m > SMOOTHER_LEN)
+    return (1 << WEDGE_WEIGHT_BITS);
+  else
+    return smoothfn[smoothness][m + SMOOTHER_LEN];
+}
+
+// [smoother][negative][direction]
+DECLARE_ALIGNED(
+    16, static uint8_t,
+    wedge_mask_obl[NSMOOTHERS][2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE *
+                                                    MASK_MASTER_SIZE]);
+
+DECLARE_ALIGNED(16, static uint8_t,
+                wedge_signflip_lookup[BLOCK_SIZES][MAX_WEDGE_TYPES]);
+
+// 3 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
+// on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
+DECLARE_ALIGNED(16, static uint8_t,
+                wedge_mask_buf[2 * MAX_WEDGE_TYPES * 3 * MAX_WEDGE_SQUARE]);
+
+static wedge_masks_type wedge_masks[BLOCK_SIZES][2];
+
+// Some unused wedge codebooks left temporarily to facilitate experiments.
+// To be removed when setteld.
+static wedge_code_type wedge_codebook_8_hgtw[8] = {
+  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
+  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
+  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
+  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
+};
+
+static wedge_code_type wedge_codebook_8_hltw[8] = {
+  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
+  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
+  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
+  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
+};
+
+static wedge_code_type wedge_codebook_8_heqw[8] = {
+  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
+  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
+  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
+  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
+};
+
+#if !USE_LARGE_WEDGE_CODEBOOK
+static const wedge_code_type wedge_codebook_16_hgtw[16] = {
+  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
+  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
+  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
+  { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
+  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
+  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
+  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
+  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
+};
+
+static const wedge_code_type wedge_codebook_16_hltw[16] = {
+  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
+  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
+  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
+  { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
+  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
+  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
+  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
+  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
+};
+
+static const wedge_code_type wedge_codebook_16_heqw[16] = {
+  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
+  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
+  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
+  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
+  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
+  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
+  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
+  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
+};
+
+const wedge_params_type wedge_params_lookup[BLOCK_SIZES] = {
+  { 0, NULL, NULL, 0, NULL },
+  { 0, NULL, NULL, 0, NULL },
+  { 0, NULL, NULL, 0, NULL },
+  { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[3], 0, wedge_masks[3] },
+  { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[4], 0, wedge_masks[4] },
+  { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[5], 0, wedge_masks[5] },
+  { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[6], 0, wedge_masks[6] },
+  { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[7], 0, wedge_masks[7] },
+  { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[8], 0, wedge_masks[8] },
+  { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[9], 0, wedge_masks[9] },
+  { 0, wedge_codebook_8_hgtw, wedge_signflip_lookup[10], 0, wedge_masks[10] },
+  { 0, wedge_codebook_8_hltw, wedge_signflip_lookup[11], 0, wedge_masks[11] },
+  { 0, wedge_codebook_8_heqw, wedge_signflip_lookup[12], 0, wedge_masks[12] },
+#if CONFIG_EXT_PARTITION
+  { 0, NULL, NULL, 0, NULL },
+  { 0, NULL, NULL, 0, NULL },
+  { 0, NULL, NULL, 0, NULL },
+#endif  // CONFIG_EXT_PARTITION
+};
+
+#else
+
+static const wedge_code_type wedge_codebook_32_hgtw[32] = {
+  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
+  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
+  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
+  { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
+  { WEDGE_OBLIQUE27, 4, 1 },  { WEDGE_OBLIQUE27, 4, 2 },
+  { WEDGE_OBLIQUE27, 4, 3 },  { WEDGE_OBLIQUE27, 4, 5 },
+  { WEDGE_OBLIQUE27, 4, 6 },  { WEDGE_OBLIQUE27, 4, 7 },
+  { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 },
+  { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 },
+  { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 },
+  { WEDGE_OBLIQUE63, 1, 4 },  { WEDGE_OBLIQUE63, 2, 4 },
+  { WEDGE_OBLIQUE63, 3, 4 },  { WEDGE_OBLIQUE63, 5, 4 },
+  { WEDGE_OBLIQUE63, 6, 4 },  { WEDGE_OBLIQUE63, 7, 4 },
+  { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 },
+  { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 },
+  { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 },
+};
+
+static const wedge_code_type wedge_codebook_32_hltw[32] = {
+  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
+  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
+  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
+  { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
+  { WEDGE_OBLIQUE27, 4, 1 },  { WEDGE_OBLIQUE27, 4, 2 },
+  { WEDGE_OBLIQUE27, 4, 3 },  { WEDGE_OBLIQUE27, 4, 5 },
+  { WEDGE_OBLIQUE27, 4, 6 },  { WEDGE_OBLIQUE27, 4, 7 },
+  { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 },
+  { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 },
+  { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 },
+  { WEDGE_OBLIQUE63, 1, 4 },  { WEDGE_OBLIQUE63, 2, 4 },
+  { WEDGE_OBLIQUE63, 3, 4 },  { WEDGE_OBLIQUE63, 5, 4 },
+  { WEDGE_OBLIQUE63, 6, 4 },  { WEDGE_OBLIQUE63, 7, 4 },
+  { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 },
+  { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 },
+  { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 },
+};
+
+static const wedge_code_type wedge_codebook_32_heqw[32] = {
+  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
+  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
+  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
+  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
+  { WEDGE_OBLIQUE27, 4, 1 },  { WEDGE_OBLIQUE27, 4, 2 },
+  { WEDGE_OBLIQUE27, 4, 3 },  { WEDGE_OBLIQUE27, 4, 5 },
+  { WEDGE_OBLIQUE27, 4, 6 },  { WEDGE_OBLIQUE27, 4, 7 },
+  { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 },
+  { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 },
+  { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 },
+  { WEDGE_OBLIQUE63, 1, 4 },  { WEDGE_OBLIQUE63, 2, 4 },
+  { WEDGE_OBLIQUE63, 3, 4 },  { WEDGE_OBLIQUE63, 5, 4 },
+  { WEDGE_OBLIQUE63, 6, 4 },  { WEDGE_OBLIQUE63, 7, 4 },
+  { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 },
+  { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 },
+  { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 },
+};
+
+const wedge_params_type wedge_params_lookup[BLOCK_SIZES] = {
+  { 0, NULL, NULL, 0, NULL },
+  { 0, NULL, NULL, 0, NULL },
+  { 0, NULL, NULL, 0, NULL },
+  { 5, wedge_codebook_32_heqw, wedge_signflip_lookup[3], 0, wedge_masks[3] },
+  { 5, wedge_codebook_32_hgtw, wedge_signflip_lookup[4], 0, wedge_masks[4] },
+  { 5, wedge_codebook_32_hltw, wedge_signflip_lookup[5], 0, wedge_masks[5] },
+  { 5, wedge_codebook_32_heqw, wedge_signflip_lookup[6], 0, wedge_masks[6] },
+  { 5, wedge_codebook_32_hgtw, wedge_signflip_lookup[7], 0, wedge_masks[7] },
+  { 5, wedge_codebook_32_hltw, wedge_signflip_lookup[8], 0, wedge_masks[8] },
+  { 5, wedge_codebook_32_heqw, wedge_signflip_lookup[9], 0, wedge_masks[9] },
+  { 0, wedge_codebook_8_hgtw, wedge_signflip_lookup[10], 0, wedge_masks[10] },
+  { 0, wedge_codebook_8_hltw, wedge_signflip_lookup[11], 0, wedge_masks[11] },
+  { 0, wedge_codebook_8_heqw, wedge_signflip_lookup[12], 0, wedge_masks[12] },
+#if CONFIG_EXT_PARTITION
+  { 0, NULL, NULL, 0, NULL },
+  { 0, NULL, NULL, 0, NULL },
+  { 0, NULL, NULL, 0, NULL },
+#endif  // CONFIG_EXT_PARTITION
+};
+#endif  // USE_LARGE_WEDGE_CODEBOOK
+
+static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
+                                             BLOCK_SIZE sb_type) {
+  const uint8_t *master;
+  const int bh = 4 << b_height_log2_lookup[sb_type];
+  const int bw = 4 << b_width_log2_lookup[sb_type];
+  const wedge_code_type *a =
+      wedge_params_lookup[sb_type].codebook + wedge_index;
+  const int smoother = wedge_params_lookup[sb_type].smoother;
+  int woff, hoff;
+  const uint8_t wsignflip = wedge_params_lookup[sb_type].signflip[wedge_index];
+
+  assert(wedge_index >= 0 &&
+         wedge_index < (1 << get_wedge_bits_lookup(sb_type)));
+  woff = (a->x_offset * bw) >> 3;
+  hoff = (a->y_offset * bh) >> 3;
+  master = wedge_mask_obl[smoother][neg ^ wsignflip][a->direction] +
+           MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
+           MASK_MASTER_SIZE / 2 - woff;
+  return master;
+}
+
+const uint8_t *vp10_get_soft_mask(int wedge_index, int wedge_sign,
+                                  BLOCK_SIZE sb_type, int offset_x,
+                                  int offset_y) {
+  const uint8_t *mask =
+      get_wedge_mask_inplace(wedge_index, wedge_sign, sb_type);
+  if (mask) mask -= (offset_x + offset_y * MASK_MASTER_STRIDE);
+  return mask;
+}
+
+static void init_wedge_master_masks() {
+  int i, j, s;
+  const int w = MASK_MASTER_SIZE;
+  const int h = MASK_MASTER_SIZE;
+  const int stride = MASK_MASTER_STRIDE;
+  const int a[2] = { 2, 1 };
+  const double asqrt = sqrt(a[0] * a[0] + a[1] * a[1]);
+  for (s = 0; s < NSMOOTHERS; s++) {
+    for (i = 0; i < h; ++i)
+      for (j = 0; j < w; ++j) {
+        int x = (2 * j + 1 - w);
+        int y = (2 * i + 1 - h);
+        int m = (int)rint((a[0] * x + a[1] * y) / asqrt);
+        wedge_mask_obl[s][1][WEDGE_OBLIQUE63][i * stride + j] =
+            wedge_mask_obl[s][1][WEDGE_OBLIQUE27][j * stride + i] =
+                get_masked_weight(m, s);
+        wedge_mask_obl[s][1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
+            wedge_mask_obl[s][1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
+                (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m, s);
+        wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride + j] =
+            wedge_mask_obl[s][0][WEDGE_OBLIQUE27][j * stride + i] =
+                (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m, s);
+        wedge_mask_obl[s][0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
+            wedge_mask_obl[s][0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
+                get_masked_weight(m, s);
+        wedge_mask_obl[s][1][WEDGE_VERTICAL][i * stride + j] =
+            wedge_mask_obl[s][1][WEDGE_HORIZONTAL][j * stride + i] =
+                get_masked_weight(x, s);
+        wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride + j] =
+            wedge_mask_obl[s][0][WEDGE_HORIZONTAL][j * stride + i] =
+                (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(x, s);
+      }
+  }
+}
+
+// If the signs for the wedges for various blocksizes are
+// inconsistent flip the sign flag. Do it only once for every
+// wedge codebook.
+static void init_wedge_signs() {
+  BLOCK_SIZE sb_type;
+  memset(wedge_signflip_lookup, 0, sizeof(wedge_signflip_lookup));
+  for (sb_type = BLOCK_4X4; sb_type < BLOCK_SIZES; ++sb_type) {
+    const int bw = 4 * num_4x4_blocks_wide_lookup[sb_type];
+    const int bh = 4 * num_4x4_blocks_high_lookup[sb_type];
+    const wedge_params_type wedge_params = wedge_params_lookup[sb_type];
+    const int wbits = wedge_params.bits;
+    const int wtypes = 1 << wbits;
+    int i, w;
+    if (wbits == 0) continue;
+    for (w = 0; w < wtypes; ++w) {
+      const uint8_t *mask = get_wedge_mask_inplace(w, 0, sb_type);
+      int sum = 0;
+      for (i = 0; i < bw; ++i) sum += mask[i];
+      for (i = 0; i < bh; ++i) sum += mask[i * MASK_MASTER_STRIDE];
+      sum = (sum + (bw + bh) / 2) / (bw + bh);
+      wedge_params.signflip[w] = (sum < 32);
+    }
+  }
+}
+
+static void init_wedge_masks() {
+  uint8_t *dst = wedge_mask_buf;
+  BLOCK_SIZE bsize;
+  memset(wedge_masks, 0, sizeof(wedge_masks));
+  for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES; ++bsize) {
+    const uint8_t *mask;
+    const int bw = 4 * num_4x4_blocks_wide_lookup[bsize];
+    const int bh = 4 * num_4x4_blocks_high_lookup[bsize];
+    const wedge_params_type *wedge_params = &wedge_params_lookup[bsize];
+    const int wbits = wedge_params->bits;
+    const int wtypes = 1 << wbits;
+    int w;
+    if (wbits == 0) continue;
+    for (w = 0; w < wtypes; ++w) {
+      mask = get_wedge_mask_inplace(w, 0, bsize);
+      vpx_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
+                        bh);
+      wedge_params->masks[0][w] = dst;
+      dst += bw * bh;
+
+      mask = get_wedge_mask_inplace(w, 1, bsize);
+      vpx_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
+                        bh);
+      wedge_params->masks[1][w] = dst;
+      dst += bw * bh;
+    }
+    assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
+  }
+}
+
+// Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
+void vp10_init_wedge_masks() {
+  init_wedge_master_masks();
+  init_wedge_signs();
+  init_wedge_masks();
+}
+
+#if CONFIG_SUPERTX
+static void build_masked_compound_wedge_extend(
+    uint8_t *dst, int dst_stride, const uint8_t *src0, int src0_stride,
+    const uint8_t *src1, int src1_stride, int wedge_index, int wedge_sign,
+    BLOCK_SIZE sb_type, int wedge_offset_x, int wedge_offset_y, int h, int w) {
+  const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
+  const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
+  const uint8_t *mask = vp10_get_soft_mask(wedge_index, wedge_sign, sb_type,
+                                           wedge_offset_x, wedge_offset_y);
+  vpx_blend_a64_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride,
+                     mask, MASK_MASTER_STRIDE, h, w, subh, subw);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void build_masked_compound_wedge_extend_highbd(
+    uint8_t *dst_8, int dst_stride, const uint8_t *src0_8, int src0_stride,
+    const uint8_t *src1_8, int src1_stride, int wedge_index, int wedge_sign,
+    BLOCK_SIZE sb_type, int wedge_offset_x, int wedge_offset_y, int h, int w,
+    int bd) {
+  const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
+  const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
+  const uint8_t *mask = vp10_get_soft_mask(wedge_index, wedge_sign, sb_type,
+                                           wedge_offset_x, wedge_offset_y);
+  vpx_highbd_blend_a64_mask(dst_8, dst_stride, src0_8, src0_stride, src1_8,
+                            src1_stride, mask, MASK_MASTER_STRIDE, h, w, subh,
+                            subw, bd);
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // CONFIG_SUPERTX
+
+static void build_masked_compound_wedge(uint8_t *dst, int dst_stride,
+                                        const uint8_t *src0, int src0_stride,
+                                        const uint8_t *src1, int src1_stride,
+                                        int wedge_index, int wedge_sign,
+                                        BLOCK_SIZE sb_type, int h, int w) {
+  // Derive subsampling from h and w passed in. May be refactored to
+  // pass in subsampling factors directly.
+  const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
+  const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
+  const uint8_t *mask =
+      vp10_get_contiguous_soft_mask(wedge_index, wedge_sign, sb_type);
+  vpx_blend_a64_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride,
+                     mask, 4 * num_4x4_blocks_wide_lookup[sb_type], h, w, subh,
+                     subw);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void build_masked_compound_wedge_highbd(
+    uint8_t *dst_8, int dst_stride, const uint8_t *src0_8, int src0_stride,
+    const uint8_t *src1_8, int src1_stride, int wedge_index, int wedge_sign,
+    BLOCK_SIZE sb_type, int h, int w, int bd) {
+  // Derive subsampling from h and w passed in. May be refactored to
+  // pass in subsampling factors directly.
+  const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
+  const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
+  const uint8_t *mask =
+      vp10_get_contiguous_soft_mask(wedge_index, wedge_sign, sb_type);
+  vpx_highbd_blend_a64_mask(
+      dst_8, dst_stride, src0_8, src0_stride, src1_8, src1_stride, mask,
+      4 * num_4x4_blocks_wide_lookup[sb_type], h, w, subh, subw, bd);
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+void vp10_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
+                                      uint8_t *dst, int dst_stride,
+                                      const int subpel_x, const int subpel_y,
+                                      const struct scale_factors *sf, int w,
+                                      int h,
+#if CONFIG_DUAL_FILTER
+                                      const INTERP_FILTER *interp_filter,
+#else
+                                      const INTERP_FILTER interp_filter,
+#endif
+                                      int xs, int ys,
+#if CONFIG_SUPERTX
+                                      int wedge_offset_x, int wedge_offset_y,
+#endif  // CONFIG_SUPERTX
+                                      const MACROBLOCKD *xd) {
+  const MODE_INFO *mi = xd->mi[0];
+// The prediction filter types used here should be those for
+// the second reference block.
+#if CONFIG_DUAL_FILTER
+  INTERP_FILTER tmp_ipf[4] = {
+    interp_filter[2], interp_filter[3], interp_filter[2], interp_filter[3],
+  };
+#else
+  INTERP_FILTER tmp_ipf = interp_filter;
+#endif  // CONFIG_DUAL_FILTER
+#if CONFIG_VP9_HIGHBITDEPTH
+  DECLARE_ALIGNED(16, uint8_t, tmp_dst_[2 * MAX_SB_SQUARE]);
+  uint8_t *tmp_dst = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+                         ? CONVERT_TO_BYTEPTR(tmp_dst_)
+                         : tmp_dst_;
+  vp10_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x,
+                            subpel_y, sf, w, h, 0, tmp_ipf, xs, ys, xd);
+#if CONFIG_SUPERTX
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+    build_masked_compound_wedge_extend_highbd(
+        dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE,
+        mi->mbmi.interinter_wedge_index, mi->mbmi.interinter_wedge_sign,
+        mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w, xd->bd);
+  else
+    build_masked_compound_wedge_extend(
+        dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE,
+        mi->mbmi.interinter_wedge_index, mi->mbmi.interinter_wedge_sign,
+        mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w);
+#else
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+    build_masked_compound_wedge_highbd(
+        dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE,
+        mi->mbmi.interinter_wedge_index, mi->mbmi.interinter_wedge_sign,
+        mi->mbmi.sb_type, h, w, xd->bd);
+  else
+    build_masked_compound_wedge(dst, dst_stride, dst, dst_stride, tmp_dst,
+                                MAX_SB_SIZE, mi->mbmi.interinter_wedge_index,
+                                mi->mbmi.interinter_wedge_sign,
+                                mi->mbmi.sb_type, h, w);
+#endif  // CONFIG_SUPERTX
+#else   // CONFIG_VP9_HIGHBITDEPTH
+  DECLARE_ALIGNED(16, uint8_t, tmp_dst[MAX_SB_SQUARE]);
+  vp10_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x,
+                            subpel_y, sf, w, h, 0, tmp_ipf, xs, ys, xd);
+#if CONFIG_SUPERTX
+  build_masked_compound_wedge_extend(
+      dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE,
+      mi->mbmi.interinter_wedge_index, mi->mbmi.interinter_wedge_sign,
+      mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w);
+#else
+  build_masked_compound_wedge(dst, dst_stride, dst, dst_stride, tmp_dst,
+                              MAX_SB_SIZE, mi->mbmi.interinter_wedge_index,
+                              mi->mbmi.interinter_wedge_sign, mi->mbmi.sb_type,
+                              h, w);
+#endif  // CONFIG_SUPERTX
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+}
+#endif  // CONFIG_EXT_INTER
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_build_inter_predictor(
+    const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride,
+    const MV *src_mv, const struct scale_factors *sf, int w, int h, int ref,
+#if CONFIG_DUAL_FILTER
+    const INTERP_FILTER *interp_filter,
+#else
+    const INTERP_FILTER interp_filter,
+#endif
+    enum mv_precision precision, int x, int y, int bd) {
+  const int is_q4 = precision == MV_PRECISION_Q4;
+  const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
+                     is_q4 ? src_mv->col : src_mv->col * 2 };
+  MV32 mv = vp10_scale_mv(&mv_q4, x, y, sf);
+  const int subpel_x = mv.col & SUBPEL_MASK;
+  const int subpel_y = mv.row & SUBPEL_MASK;
+
+  src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
+
+  highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
+                         sf, w, h, ref, interp_filter, sf->x_step_q4,
+                         sf->y_step_q4, bd);
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+void vp10_build_inter_predictor(const uint8_t *src, int src_stride,
+                                uint8_t *dst, int dst_stride, const MV *src_mv,
+                                const struct scale_factors *sf, int w, int h,
+                                int ref,
+#if CONFIG_DUAL_FILTER
+                                const INTERP_FILTER *interp_filter,
+#else
+                                const INTERP_FILTER interp_filter,
+#endif
+                                enum mv_precision precision, int x, int y) {
+  const int is_q4 = precision == MV_PRECISION_Q4;
+  const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
+                     is_q4 ? src_mv->col : src_mv->col * 2 };
+  MV32 mv = vp10_scale_mv(&mv_q4, x, y, sf);
+  const int subpel_x = mv.col & SUBPEL_MASK;
+  const int subpel_y = mv.row & SUBPEL_MASK;
+
+  src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
+
+  inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w,
+                  h, ref, interp_filter, sf->x_step_q4, sf->y_step_q4);
+}
+
+void build_inter_predictors(MACROBLOCKD *xd, int plane,
+#if CONFIG_OBMC
+                            int mi_col_offset, int mi_row_offset,
+#endif  // CONFIG_OBMC
+                            int block, int bw, int bh, int x, int y, int w,
+                            int h,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+                            int wedge_offset_x, int wedge_offset_y,
+#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
+                            int mi_x, int mi_y) {
+  struct macroblockd_plane *const pd = &xd->plane[plane];
+#if CONFIG_OBMC
+  const MODE_INFO *mi = xd->mi[mi_col_offset + xd->mi_stride * mi_row_offset];
+#else
+  const MODE_INFO *mi = xd->mi[0];
+#endif  // CONFIG_OBMC
+  const int is_compound = has_second_ref(&mi->mbmi);
+  int ref;
+#if CONFIG_GLOBAL_MOTION
+  Global_Motion_Params *gm[2];
+  int is_global[2];
+  for (ref = 0; ref < 1 + is_compound; ++ref) {
+    gm[ref] = &xd->global_motion[mi->mbmi.ref_frame[ref]];
+    is_global[ref] =
+        (get_y_mode(mi, block) == ZEROMV && get_gmtype(gm[ref]) > GLOBAL_ZERO);
+  }
+  // TODO(sarahparker) remove these once gm works with all experiments
+  (void)gm;
+  (void)is_global;
+#endif  // CONFIG_GLOBAL_MOTION
+
+// TODO(sarahparker) enable the use of DUAL_FILTER in warped motion functions
+// in order to allow GLOBAL_MOTION and DUAL_FILTER to work together
+#if CONFIG_DUAL_FILTER
+  if (mi->mbmi.sb_type < BLOCK_8X8 && plane > 0) {
+    // block size in log2
+    const int b4_wl = b_width_log2_lookup[mi->mbmi.sb_type];
+    const int b4_hl = b_height_log2_lookup[mi->mbmi.sb_type];
+    const int b8_sl = b_width_log2_lookup[BLOCK_8X8];
+
+    // block size
+    const int b4_w = 1 << b4_wl;
+    const int b4_h = 1 << b4_hl;
+    const int b8_s = 1 << b8_sl;
+    int idx, idy;
+
+    const int x_base = x;
+    const int y_base = y;
+
+    // processing unit size
+    const int x_step = w >> (b8_sl - b4_wl);
+    const int y_step = h >> (b8_sl - b4_hl);
+
+    for (idy = 0; idy < b8_s; idy += b4_h) {
+      for (idx = 0; idx < b8_s; idx += b4_w) {
+        const int chr_idx = (idy * 2) + idx;
+        for (ref = 0; ref < 1 + is_compound; ++ref) {
+          const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
+          struct buf_2d *const pre_buf = &pd->pre[ref];
+          struct buf_2d *const dst_buf = &pd->dst;
+          uint8_t *dst = dst_buf->buf;
+          const MV mv = mi->bmi[chr_idx].as_mv[ref].as_mv;
+          const MV mv_q4 = clamp_mv_to_umv_border_sb(
+              xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y);
+          uint8_t *pre;
+          MV32 scaled_mv;
+          int xs, ys, subpel_x, subpel_y;
+          const int is_scaled = vp10_is_scaled(sf);
+
+          x = x_base + idx * x_step;
+          y = y_base + idy * y_step;
+
+          dst += dst_buf->stride * y + x;
+
+          if (is_scaled) {
+            pre =
+                pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf);
+            scaled_mv = vp10_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
+            xs = sf->x_step_q4;
+            ys = sf->y_step_q4;
+          } else {
+            pre = pre_buf->buf + y * pre_buf->stride + x;
+            scaled_mv.row = mv_q4.row;
+            scaled_mv.col = mv_q4.col;
+            xs = ys = 16;
+          }
+
+          subpel_x = scaled_mv.col & SUBPEL_MASK;
+          subpel_y = scaled_mv.row & SUBPEL_MASK;
+          pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride +
+                 (scaled_mv.col >> SUBPEL_BITS);
+
+#if CONFIG_EXT_INTER
+          if (ref && is_interinter_wedge_used(mi->mbmi.sb_type) &&
+              mi->mbmi.use_wedge_interinter)
+            vp10_make_masked_inter_predictor(
+                pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y,
+                sf, w, h, mi->mbmi.interp_filter, xs, ys,
+#if CONFIG_SUPERTX
+                wedge_offset_x, wedge_offset_y,
+#endif  // CONFIG_SUPERTX
+                xd);
+          else
+#endif  // CONFIG_EXT_INTER
+            vp10_make_inter_predictor(
+                pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y,
+                sf, x_step, y_step, ref, mi->mbmi.interp_filter, xs, ys, xd);
+        }
+      }
+    }
+    return;
+  }
+#endif
+
+  for (ref = 0; ref < 1 + is_compound; ++ref) {
+    const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
+    struct buf_2d *const pre_buf = &pd->pre[ref];
+    struct buf_2d *const dst_buf = &pd->dst;
+    uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
+    const MV mv = mi->mbmi.sb_type < BLOCK_8X8
+                      ? average_split_mvs(pd, mi, ref, block)
+                      : mi->mbmi.mv[ref].as_mv;
+
+    // TODO(jkoleszar): This clamping is done in the incorrect place for the
+    // scaling case. It needs to be done on the scaled MV, not the pre-scaling
+    // MV. Note however that it performs the subsampling aware scaling so
+    // that the result is always q4.
+    // mv_precision precision is MV_PRECISION_Q4.
+    const MV mv_q4 = clamp_mv_to_umv_border_sb(
+        xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y);
+
+    uint8_t *pre;
+    MV32 scaled_mv;
+    int xs, ys, subpel_x, subpel_y;
+    const int is_scaled = vp10_is_scaled(sf);
+
+    if (is_scaled) {
+      pre = pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf);
+      scaled_mv = vp10_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
+      xs = sf->x_step_q4;
+      ys = sf->y_step_q4;
+    } else {
+      pre = pre_buf->buf + (y * pre_buf->stride + x);
+      scaled_mv.row = mv_q4.row;
+      scaled_mv.col = mv_q4.col;
+      xs = ys = 16;
+    }
+
+    subpel_x = scaled_mv.col & SUBPEL_MASK;
+    subpel_y = scaled_mv.row & SUBPEL_MASK;
+    pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride +
+           (scaled_mv.col >> SUBPEL_BITS);
+
+#if CONFIG_EXT_INTER
+    if (ref && is_interinter_wedge_used(mi->mbmi.sb_type) &&
+        mi->mbmi.use_wedge_interinter)
+      vp10_make_masked_inter_predictor(pre, pre_buf->stride, dst,
+                                       dst_buf->stride, subpel_x, subpel_y, sf,
+                                       w, h, mi->mbmi.interp_filter, xs, ys,
+#if CONFIG_SUPERTX
+                                       wedge_offset_x, wedge_offset_y,
+#endif  // CONFIG_SUPERTX
+                                       xd);
+    else
+#else  // CONFIG_EXT_INTER
+#if CONFIG_GLOBAL_MOTION
+    if (is_global[ref])
+      vp10_warp_plane(&(gm[ref]->motion_params),
+#if CONFIG_VP9_HIGHBITDEPTH
+                      xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+                      pre_buf->buf0, pre_buf->width, pre_buf->height,
+                      pre_buf->stride, dst, (mi_x >> pd->subsampling_x) + x,
+                      (mi_y >> pd->subsampling_y) + y, w, h, dst_buf->stride,
+                      pd->subsampling_x, pd->subsampling_y, xs, ys);
+    else
+#endif  // CONFIG_GLOBAL_MOTION
+#endif  // CONFIG_EXT_INTER
+      vp10_make_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
+                                subpel_x, subpel_y, sf, w, h, ref,
+                                mi->mbmi.interp_filter, xs, ys, xd);
+  }
+}
+
+void vp10_build_inter_predictor_sub8x8(MACROBLOCKD *xd, int plane, int i,
+                                       int ir, int ic, int mi_row, int mi_col) {
+  struct macroblockd_plane *const pd = &xd->plane[plane];
+  MODE_INFO *const mi = xd->mi[0];
+  const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
+  const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+  const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
+
+  uint8_t *const dst = &pd->dst.buf[(ir * pd->dst.stride + ic) << 2];
+  int ref;
+  const int is_compound = has_second_ref(&mi->mbmi);
+
+  for (ref = 0; ref < 1 + is_compound; ++ref) {
+    const uint8_t *pre =
+        &pd->pre[ref].buf[(ir * pd->pre[ref].stride + ic) << 2];
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+      vp10_highbd_build_inter_predictor(
+          pre, pd->pre[ref].stride, dst, pd->dst.stride,
+          &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height,
+          ref, mi->mbmi.interp_filter, MV_PRECISION_Q3,
+          mi_col * MI_SIZE + 4 * ic, mi_row * MI_SIZE + 4 * ir, xd->bd);
+    } else {
+      vp10_build_inter_predictor(
+          pre, pd->pre[ref].stride, dst, pd->dst.stride,
+          &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height,
+          ref, mi->mbmi.interp_filter, MV_PRECISION_Q3,
+          mi_col * MI_SIZE + 4 * ic, mi_row * MI_SIZE + 4 * ir);
+    }
+#else
+    vp10_build_inter_predictor(
+        pre, pd->pre[ref].stride, dst, pd->dst.stride,
+        &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height,
+        ref, mi->mbmi.interp_filter, MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * ic,
+        mi_row * MI_SIZE + 4 * ir);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+  }
+}
+
+static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize,
+                                              int mi_row, int mi_col,
+                                              int plane_from, int plane_to) {
+  int plane;
+  const int mi_x = mi_col * MI_SIZE;
+  const int mi_y = mi_row * MI_SIZE;
+  for (plane = plane_from; plane <= plane_to; ++plane) {
+    const struct macroblockd_plane *pd = &xd->plane[plane];
+    const int bw = 4 * num_4x4_blocks_wide_lookup[bsize] >> pd->subsampling_x;
+    const int bh = 4 * num_4x4_blocks_high_lookup[bsize] >> pd->subsampling_y;
+
+    if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
+      const PARTITION_TYPE bp = bsize - xd->mi[0]->mbmi.sb_type;
+      const int have_vsplit = bp != PARTITION_HORZ;
+      const int have_hsplit = bp != PARTITION_VERT;
+      const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
+      const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
+      const int pw = 8 >> (have_vsplit | pd->subsampling_x);
+      const int ph = 8 >> (have_hsplit | pd->subsampling_y);
+      int x, y;
+      assert(bp != PARTITION_NONE && bp < PARTITION_TYPES);
+      assert(bsize == BLOCK_8X8);
+      assert(pw * num_4x4_w == bw && ph * num_4x4_h == bh);
+      for (y = 0; y < num_4x4_h; ++y)
+        for (x = 0; x < num_4x4_w; ++x)
+          build_inter_predictors(xd, plane,
+#if CONFIG_OBMC
+                                 0, 0,
+#endif  // CONFIG_OBMC
+                                 y * 2 + x, bw, bh, 4 * x, 4 * y, pw, ph,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+                                 0, 0,
+#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
+                                 mi_x, mi_y);
+    } else {
+      build_inter_predictors(xd, plane,
+#if CONFIG_OBMC
+                             0, 0,
+#endif  // CONFIG_OBMC
+                             0, bw, bh, 0, 0, bw, bh,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+                             0, 0,
+#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
+                             mi_x, mi_y);
+    }
+  }
+}
+
+void vp10_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col,
+                                     BLOCK_SIZE bsize) {
+  build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0, 0);
+#if CONFIG_EXT_INTER
+  if (is_interintra_pred(&xd->mi[0]->mbmi))
+    vp10_build_interintra_predictors_sby(xd, xd->plane[0].dst.buf,
+                                         xd->plane[0].dst.stride, bsize);
+#endif  // CONFIG_EXT_INTER
+}
+
+void vp10_build_inter_predictors_sbp(MACROBLOCKD *xd, int mi_row, int mi_col,
+                                     BLOCK_SIZE bsize, int plane) {
+  build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, plane, plane);
+#if CONFIG_EXT_INTER
+  if (is_interintra_pred(&xd->mi[0]->mbmi)) {
+    if (plane == 0) {
+      vp10_build_interintra_predictors_sby(xd, xd->plane[0].dst.buf,
+                                           xd->plane[0].dst.stride, bsize);
+    } else {
+      vp10_build_interintra_predictors_sbc(xd, xd->plane[plane].dst.buf,
+                                           xd->plane[plane].dst.stride, plane,
+                                           bsize);
+    }
+  }
+#endif  // CONFIG_EXT_INTER
+}
+
+void vp10_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col,
+                                      BLOCK_SIZE bsize) {
+  build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 1,
+                                    MAX_MB_PLANE - 1);
+#if CONFIG_EXT_INTER
+  if (is_interintra_pred(&xd->mi[0]->mbmi))
+    vp10_build_interintra_predictors_sbuv(
+        xd, xd->plane[1].dst.buf, xd->plane[2].dst.buf, xd->plane[1].dst.stride,
+        xd->plane[2].dst.stride, bsize);
+#endif  // CONFIG_EXT_INTER
+}
+
+void vp10_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
+                                    BLOCK_SIZE bsize) {
+  build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0,
+                                    MAX_MB_PLANE - 1);
+#if CONFIG_EXT_INTER
+  if (is_interintra_pred(&xd->mi[0]->mbmi))
+    vp10_build_interintra_predictors(
+        xd, xd->plane[0].dst.buf, xd->plane[1].dst.buf, xd->plane[2].dst.buf,
+        xd->plane[0].dst.stride, xd->plane[1].dst.stride,
+        xd->plane[2].dst.stride, bsize);
+#endif  // CONFIG_EXT_INTER
+}
+
+void vp10_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE],
+                           const YV12_BUFFER_CONFIG *src, int mi_row,
+                           int mi_col) {
+  uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer,
+                                           src->v_buffer };
+  const int widths[MAX_MB_PLANE] = { src->y_crop_width, src->uv_crop_width,
+                                     src->uv_crop_width };
+  const int heights[MAX_MB_PLANE] = { src->y_crop_height, src->uv_crop_height,
+                                      src->uv_crop_height };
+  const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride,
+                                      src->uv_stride };
+  int i;
+
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
+    struct macroblockd_plane *const pd = &planes[i];
+    setup_pred_plane(&pd->dst, buffers[i], widths[i], heights[i], strides[i],
+                     mi_row, mi_col, NULL, pd->subsampling_x,
+                     pd->subsampling_y);
+  }
+}
+
+void vp10_setup_pre_planes(MACROBLOCKD *xd, int idx,
+                           const YV12_BUFFER_CONFIG *src, int mi_row,
+                           int mi_col, const struct scale_factors *sf) {
+  if (src != NULL) {
+    int i;
+    uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer,
+                                             src->v_buffer };
+    const int widths[MAX_MB_PLANE] = { src->y_crop_width, src->uv_crop_width,
+                                       src->uv_crop_width };
+    const int heights[MAX_MB_PLANE] = { src->y_crop_height, src->uv_crop_height,
+                                        src->uv_crop_height };
+    const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride,
+                                        src->uv_stride };
+    for (i = 0; i < MAX_MB_PLANE; ++i) {
+      struct macroblockd_plane *const pd = &xd->plane[i];
+      setup_pred_plane(&pd->pre[idx], buffers[i], widths[i], heights[i],
+                       strides[i], mi_row, mi_col, sf, pd->subsampling_x,
+                       pd->subsampling_y);
+    }
+  }
+}
+
+#if CONFIG_SUPERTX
+static const uint8_t mask_8[8] = { 64, 64, 62, 52, 12, 2, 0, 0 };
+
+static const uint8_t mask_16[16] = { 63, 62, 60, 58, 55, 50, 43, 36,
+                                     28, 21, 14, 9,  6,  4,  2,  1 };
+
+static const uint8_t mask_32[32] = { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63,
+                                     61, 57, 52, 45, 36, 28, 19, 12, 7,  3,  1,
+                                     0,  0,  0,  0,  0,  0,  0,  0,  0,  0 };
+
+static const uint8_t mask_8_uv[8] = { 64, 64, 62, 52, 12, 2, 0, 0 };
+
+static const uint8_t mask_16_uv[16] = { 64, 64, 64, 64, 61, 53, 45, 36,
+                                        28, 19, 11, 3,  0,  0,  0,  0 };
+
+static const uint8_t mask_32_uv[32] = { 64, 64, 64, 64, 64, 64, 64, 64,
+                                        64, 64, 64, 64, 60, 54, 46, 36,
+                                        28, 18, 10, 4,  0,  0,  0,  0,
+                                        0,  0,  0,  0,  0,  0,  0,  0 };
+
+static const uint8_t *get_supertx_mask(int length, int plane) {
+  switch (length) {
+    case 8: return plane ? mask_8_uv : mask_8;
+    case 16: return plane ? mask_16_uv : mask_16;
+    case 32: return plane ? mask_32_uv : mask_32;
+    default: assert(0);
+  }
+  return NULL;
+}
+
+void vp10_build_masked_inter_predictor_complex(
+    MACROBLOCKD *xd, uint8_t *dst, int dst_stride, const uint8_t *pre,
+    int pre_stride, int mi_row, int mi_col, int mi_row_ori, int mi_col_ori,
+    BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, PARTITION_TYPE partition,
+    int plane) {
+  const struct macroblockd_plane *pd = &xd->plane[plane];
+  const int ssx = pd->subsampling_x;
+  const int ssy = pd->subsampling_y;
+  const int top_w = (4 << b_width_log2_lookup[top_bsize]) >> ssx;
+  const int top_h = (4 << b_height_log2_lookup[top_bsize]) >> ssy;
+  const int w = (4 << b_width_log2_lookup[bsize]) >> ssx;
+  const int h = (4 << b_height_log2_lookup[bsize]) >> ssy;
+  const int w_offset = ((mi_col - mi_col_ori) * MI_SIZE) >> ssx;
+  const int h_offset = ((mi_row - mi_row_ori) * MI_SIZE) >> ssy;
+
+  int w_remain, h_remain;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  const int is_hdb = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+  assert(bsize <= BLOCK_32X32);
+  assert(IMPLIES(plane == 0, ssx == 0));
+  assert(IMPLIES(plane == 0, ssy == 0));
+
+  switch (partition) {
+    case PARTITION_HORZ: {
+      const uint8_t *const mask = get_supertx_mask(h, ssy);
+
+      w_remain = top_w;
+      h_remain = top_h - h_offset - h;
+      dst += h_offset * dst_stride;
+      pre += h_offset * pre_stride;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (is_hdb)
+        vpx_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, pre,
+                                   pre_stride, mask, h, top_w, xd->bd);
+      else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+        vpx_blend_a64_vmask(dst, dst_stride, dst, dst_stride, pre, pre_stride,
+                            mask, h, top_w);
+
+      dst += h * dst_stride;
+      pre += h * pre_stride;
+      break;
+    }
+    case PARTITION_VERT: {
+      const uint8_t *const mask = get_supertx_mask(w, ssx);
+
+      w_remain = top_w - w_offset - w;
+      h_remain = top_h;
+      dst += w_offset;
+      pre += w_offset;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (is_hdb)
+        vpx_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, pre,
+                                   pre_stride, mask, top_h, w, xd->bd);
+      else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+        vpx_blend_a64_hmask(dst, dst_stride, dst, dst_stride, pre, pre_stride,
+                            mask, top_h, w);
+
+      dst += w;
+      pre += w;
+      break;
+    }
+    default: {
+      assert(0);
+      return;
+    }
+  }
+
+  if (w_remain == 0 || h_remain == 0) {
+    return;
+  }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (is_hdb) {
+    dst = (uint8_t *)CONVERT_TO_SHORTPTR(dst);
+    pre = (const uint8_t *)CONVERT_TO_SHORTPTR(pre);
+    dst_stride *= 2;
+    pre_stride *= 2;
+    w_remain *= 2;
+  }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+  do {
+    memcpy(dst, pre, w_remain * sizeof(uint8_t));
+    dst += dst_stride;
+    pre += pre_stride;
+  } while (--h_remain);
+}
+
+void vp10_build_inter_predictors_sb_sub8x8_extend(MACROBLOCKD *xd,
+#if CONFIG_EXT_INTER
+                                                  int mi_row_ori,
+                                                  int mi_col_ori,
+#endif  // CONFIG_EXT_INTER
+                                                  int mi_row, int mi_col,
+                                                  BLOCK_SIZE bsize, int block) {
+  // Prediction function used in supertx:
+  // Use the mv at current block (which is less than 8x8)
+  // to get prediction of a block located at (mi_row, mi_col) at size of bsize
+  // bsize can be larger than 8x8.
+  // block (0-3): the sub8x8 location of current block
+  int plane;
+  const int mi_x = mi_col * MI_SIZE;
+  const int mi_y = mi_row * MI_SIZE;
+#if CONFIG_EXT_INTER
+  const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE;
+  const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE;
+#endif  // CONFIG_EXT_INTER
+
+  // For sub8x8 uv:
+  // Skip uv prediction in supertx except the first block (block = 0)
+  int max_plane = block ? 1 : MAX_MB_PLANE;
+
+  for (plane = 0; plane < max_plane; plane++) {
+    const BLOCK_SIZE plane_bsize =
+        get_plane_block_size(bsize, &xd->plane[plane]);
+    const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+    const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+    const int bw = 4 * num_4x4_w;
+    const int bh = 4 * num_4x4_h;
+
+    build_inter_predictors(xd, plane,
+#if CONFIG_OBMC
+                           0, 0,
+#endif  // CONFIG_OBMC
+                           block, bw, bh, 0, 0, bw, bh,
+#if CONFIG_EXT_INTER
+                           wedge_offset_x, wedge_offset_y,
+#endif  // CONFIG_EXT_INTER
+                           mi_x, mi_y);
+  }
+#if CONFIG_EXT_INTER
+  if (is_interintra_pred(&xd->mi[0]->mbmi))
+    vp10_build_interintra_predictors(
+        xd, xd->plane[0].dst.buf, xd->plane[1].dst.buf, xd->plane[2].dst.buf,
+        xd->plane[0].dst.stride, xd->plane[1].dst.stride,
+        xd->plane[2].dst.stride, bsize);
+#endif  // CONFIG_EXT_INTER
+}
+
+void vp10_build_inter_predictors_sb_extend(MACROBLOCKD *xd,
+#if CONFIG_EXT_INTER
+                                           int mi_row_ori, int mi_col_ori,
+#endif  // CONFIG_EXT_INTER
+                                           int mi_row, int mi_col,
+                                           BLOCK_SIZE bsize) {
+  int plane;
+  const int mi_x = mi_col * MI_SIZE;
+  const int mi_y = mi_row * MI_SIZE;
+#if CONFIG_EXT_INTER
+  const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE;
+  const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE;
+#endif  // CONFIG_EXT_INTER
+  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+    const BLOCK_SIZE plane_bsize =
+        get_plane_block_size(bsize, &xd->plane[plane]);
+    const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+    const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+    const int bw = 4 * num_4x4_w;
+    const int bh = 4 * num_4x4_h;
+
+    if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
+      int x, y;
+      assert(bsize == BLOCK_8X8);
+      for (y = 0; y < num_4x4_h; ++y)
+        for (x = 0; x < num_4x4_w; ++x)
+          build_inter_predictors(xd, plane,
+#if CONFIG_OBMC
+                                 0, 0,
+#endif  // CONFIG_OBMC
+                                 y * 2 + x, bw, bh, 4 * x, 4 * y, 4, 4,
+#if CONFIG_EXT_INTER
+                                 wedge_offset_x, wedge_offset_y,
+#endif  // CONFIG_EXT_INTER
+                                 mi_x, mi_y);
+    } else {
+      build_inter_predictors(xd, plane,
+#if CONFIG_OBMC
+                             0, 0,
+#endif  // CONFIG_OBMC
+                             0, bw, bh, 0, 0, bw, bh,
+#if CONFIG_EXT_INTER
+                             wedge_offset_x, wedge_offset_y,
+#endif  // CONFIG_EXT_INTER
+                             mi_x, mi_y);
+    }
+  }
+}
+#endif  // CONFIG_SUPERTX
+
+#if CONFIG_OBMC
+// obmc_mask_N[overlap_position]
+static const uint8_t obmc_mask_1[1] = { 55 };
+
+static const uint8_t obmc_mask_2[2] = { 45, 62 };
+
+static const uint8_t obmc_mask_4[4] = { 39, 50, 59, 64 };
+
+static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 63, 64 };
+
+static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
+                                          56, 58, 60, 61, 63, 64, 64, 64 };
+
+static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
+                                          45, 47, 48, 50, 51, 52, 53, 55,
+                                          56, 57, 58, 59, 60, 60, 61, 62,
+                                          62, 63, 63, 64, 64, 64, 64, 64 };
+
+#if CONFIG_EXT_PARTITION
+static const uint8_t obmc_mask_64[64] = {
+  33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
+  45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
+  56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
+  62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+};
+#endif  // CONFIG_EXT_PARTITION
+
+const uint8_t *vp10_get_obmc_mask(int length) {
+  switch (length) {
+    case 1: return obmc_mask_1;
+    case 2: return obmc_mask_2;
+    case 4: return obmc_mask_4;
+    case 8: return obmc_mask_8;
+    case 16: return obmc_mask_16;
+    case 32: return obmc_mask_32;
+#if CONFIG_EXT_PARTITION
+    case 64: return obmc_mask_64;
+#endif  // CONFIG_EXT_PARTITION
+    default: assert(0); return NULL;
+  }
+}
+
+// This function combines motion compensated predictions that is generated by
+// top/left neighboring blocks' inter predictors with the regular inter
+// prediction. We assume the original prediction (bmc) is stored in
+// xd->plane[].dst.buf
+void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, MACROBLOCKD *xd,
+                                      int mi_row, int mi_col,
+                                      uint8_t *above[MAX_MB_PLANE],
+                                      int above_stride[MAX_MB_PLANE],
+                                      uint8_t *left[MAX_MB_PLANE],
+                                      int left_stride[MAX_MB_PLANE]) {
+  const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+  int plane, i;
+#if CONFIG_VP9_HIGHBITDEPTH
+  const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+  // handle above row
+  if (xd->up_available) {
+    const int overlap = num_4x4_blocks_high_lookup[bsize] * 2;
+    const int miw = VPXMIN(xd->n8_w, cm->mi_cols - mi_col);
+    const int mi_row_offset = -1;
+
+    assert(miw > 0);
+
+    i = 0;
+    do {  // for each mi in the above row
+      const int mi_col_offset = i;
+      const MB_MODE_INFO *const above_mbmi =
+          &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
+      const int mi_step =
+          VPXMIN(xd->n8_w, num_8x8_blocks_wide_lookup[above_mbmi->sb_type]);
+
+      if (is_neighbor_overlappable(above_mbmi)) {
+        for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+          const struct macroblockd_plane *pd = &xd->plane[plane];
+          const int bw = (mi_step * MI_SIZE) >> pd->subsampling_x;
+          const int bh = overlap >> pd->subsampling_y;
+          const int dst_stride = pd->dst.stride;
+          uint8_t *const dst = &pd->dst.buf[(i * MI_SIZE) >> pd->subsampling_x];
+          const int tmp_stride = above_stride[plane];
+          const uint8_t *const tmp =
+              &above[plane][(i * MI_SIZE) >> pd->subsampling_x];
+          const uint8_t *const mask = vp10_get_obmc_mask(bh);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+          if (is_hbd)
+            vpx_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
+                                       tmp_stride, mask, bh, bw, xd->bd);
+          else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+            vpx_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
+                                tmp_stride, mask, bh, bw);
+        }
+      }
+      i += mi_step;
+    } while (i < miw);
+  }
+
+  // handle left column
+  if (xd->left_available) {
+    const int overlap = num_4x4_blocks_wide_lookup[bsize] * 2;
+    const int mih = VPXMIN(xd->n8_h, cm->mi_rows - mi_row);
+    const int mi_col_offset = -1;
+
+    assert(mih > 0);
+
+    i = 0;
+    do {  // for each mi in the left column
+      const int mi_row_offset = i;
+      const MB_MODE_INFO *const left_mbmi =
+          &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
+      const int mi_step =
+          VPXMIN(xd->n8_h, num_8x8_blocks_high_lookup[left_mbmi->sb_type]);
+
+      if (is_neighbor_overlappable(left_mbmi)) {
+        for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+          const struct macroblockd_plane *pd = &xd->plane[plane];
+          const int bw = overlap >> pd->subsampling_x;
+          const int bh = (mi_step * MI_SIZE) >> pd->subsampling_y;
+          const int dst_stride = pd->dst.stride;
+          uint8_t *const dst =
+              &pd->dst.buf[(i * MI_SIZE * dst_stride) >> pd->subsampling_y];
+          const int tmp_stride = left_stride[plane];
+          const uint8_t *const tmp =
+              &left[plane][(i * MI_SIZE * tmp_stride) >> pd->subsampling_y];
+          const uint8_t *const mask = vp10_get_obmc_mask(bw);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+          if (is_hbd)
+            vpx_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
+                                       tmp_stride, mask, bh, bw, xd->bd);
+          else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+            vpx_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
+                                tmp_stride, mask, bh, bw);
+        }
+      }
+      i += mi_step;
+    } while (i < mih);
+  }
+}
+
+#if CONFIG_EXT_INTER
+void modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
+  if (is_interintra_pred(mbmi)) {
+    mbmi->ref_frame[1] = NONE;
+  } else if (has_second_ref(mbmi) && is_interinter_wedge_used(mbmi->sb_type) &&
+             mbmi->use_wedge_interinter) {
+    mbmi->use_wedge_interinter = 0;
+    mbmi->ref_frame[1] = NONE;
+  }
+  return;
+}
+#endif  // CONFIG_EXT_INTER
+
+void vp10_build_prediction_by_above_preds(VP10_COMMON *cm, MACROBLOCKD *xd,
+                                          int mi_row, int mi_col,
+                                          uint8_t *tmp_buf[MAX_MB_PLANE],
+                                          int tmp_width[MAX_MB_PLANE],
+                                          int tmp_height[MAX_MB_PLANE],
+                                          int tmp_stride[MAX_MB_PLANE]) {
+  const TileInfo *const tile = &xd->tile;
+  BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+  int i, j, mi_step, ref;
+
+  if (mi_row <= tile->mi_row_start) return;
+
+  for (i = 0; i < VPXMIN(xd->n8_w, cm->mi_cols - mi_col); i += mi_step) {
+    int mi_row_offset = -1;
+    int mi_col_offset = i;
+    int mi_x, mi_y, bw, bh;
+    MODE_INFO *above_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+    MB_MODE_INFO *above_mbmi = &above_mi->mbmi;
+#if CONFIG_EXT_INTER
+    MB_MODE_INFO backup_mbmi;
+#endif  // CONFIG_EXT_INTER
+
+    mi_step = VPXMIN(xd->n8_w, num_8x8_blocks_wide_lookup[above_mbmi->sb_type]);
+
+    if (!is_neighbor_overlappable(above_mbmi)) continue;
+
+#if CONFIG_EXT_INTER
+    backup_mbmi = *above_mbmi;
+    modify_neighbor_predictor_for_obmc(above_mbmi);
+#endif  // CONFIG_EXT_INTER
+
+    for (j = 0; j < MAX_MB_PLANE; ++j) {
+      struct macroblockd_plane *const pd = &xd->plane[j];
+      setup_pred_plane(&pd->dst, tmp_buf[j], tmp_width[j], tmp_height[j],
+                       tmp_stride[j], 0, i, NULL, pd->subsampling_x,
+                       pd->subsampling_y);
+    }
+    for (ref = 0; ref < 1 + has_second_ref(above_mbmi); ++ref) {
+      MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
+      RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
+
+      xd->block_refs[ref] = ref_buf;
+      if ((!vp10_is_valid_scale(&ref_buf->sf)))
+        vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
+                           "Reference frame has invalid dimensions");
+      vp10_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col + i,
+                            &ref_buf->sf);
+    }
+
+    xd->mb_to_left_edge = -(((mi_col + i) * MI_SIZE) * 8);
+    mi_x = (mi_col + i) << MI_SIZE_LOG2;
+    mi_y = mi_row << MI_SIZE_LOG2;
+
+    for (j = 0; j < MAX_MB_PLANE; ++j) {
+      const struct macroblockd_plane *pd = &xd->plane[j];
+      bw = (mi_step * 8) >> pd->subsampling_x;
+      bh = VPXMAX((num_4x4_blocks_high_lookup[bsize] * 2) >> pd->subsampling_y,
+                  4);
+
+      if (above_mbmi->sb_type < BLOCK_8X8) {
+        const PARTITION_TYPE bp = BLOCK_8X8 - above_mbmi->sb_type;
+        const int have_vsplit = bp != PARTITION_HORZ;
+        const int have_hsplit = bp != PARTITION_VERT;
+        const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
+        const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
+        const int pw = 8 >> (have_vsplit | pd->subsampling_x);
+        int x, y;
+
+        for (y = 0; y < num_4x4_h; ++y)
+          for (x = 0; x < num_4x4_w; ++x) {
+            if ((bp == PARTITION_HORZ || bp == PARTITION_SPLIT) && y == 0 &&
+                !pd->subsampling_y)
+              continue;
+
+            build_inter_predictors(xd, j, mi_col_offset, mi_row_offset,
+                                   y * 2 + x, bw, bh, 4 * x, 0, pw, bh,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+                                   0, 0,
+#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
+                                   mi_x, mi_y);
+          }
+      } else {
+        build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, 0, bw, bh,
+                               0, 0, bw, bh,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+                               0, 0,
+#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
+                               mi_x, mi_y);
+      }
+    }
+#if CONFIG_EXT_INTER
+    *above_mbmi = backup_mbmi;
+#endif  // CONFIG_EXT_INTER
+  }
+  xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
+}
+
+void vp10_build_prediction_by_left_preds(VP10_COMMON *cm, MACROBLOCKD *xd,
+                                         int mi_row, int mi_col,
+                                         uint8_t *tmp_buf[MAX_MB_PLANE],
+                                         int tmp_width[MAX_MB_PLANE],
+                                         int tmp_height[MAX_MB_PLANE],
+                                         int tmp_stride[MAX_MB_PLANE]) {
+  const TileInfo *const tile = &xd->tile;
+  BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+  int i, j, mi_step, ref;
+
+  if (mi_col == 0 || (mi_col - 1 < tile->mi_col_start)) return;
+
+  for (i = 0; i < VPXMIN(xd->n8_h, cm->mi_rows - mi_row); i += mi_step) {
+    int mi_row_offset = i;
+    int mi_col_offset = -1;
+    int mi_x, mi_y, bw, bh;
+    MODE_INFO *left_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+    MB_MODE_INFO *left_mbmi = &left_mi->mbmi;
+#if CONFIG_EXT_INTER
+    MB_MODE_INFO backup_mbmi;
+#endif  // CONFIG_EXT_INTER
+
+    mi_step = VPXMIN(xd->n8_h, num_8x8_blocks_high_lookup[left_mbmi->sb_type]);
+
+    if (!is_neighbor_overlappable(left_mbmi)) continue;
+
+#if CONFIG_EXT_INTER
+    backup_mbmi = *left_mbmi;
+    modify_neighbor_predictor_for_obmc(left_mbmi);
+#endif  // CONFIG_EXT_INTER
+
+    for (j = 0; j < MAX_MB_PLANE; ++j) {
+      struct macroblockd_plane *const pd = &xd->plane[j];
+      setup_pred_plane(&pd->dst, tmp_buf[j], tmp_width[j], tmp_height[j],
+                       tmp_stride[j], i, 0, NULL, pd->subsampling_x,
+                       pd->subsampling_y);
+    }
+    for (ref = 0; ref < 1 + has_second_ref(left_mbmi); ++ref) {
+      MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
+      RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
+
+      xd->block_refs[ref] = ref_buf;
+      if ((!vp10_is_valid_scale(&ref_buf->sf)))
+        vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
+                           "Reference frame has invalid dimensions");
+      vp10_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + i, mi_col,
+                            &ref_buf->sf);
+    }
+
+    xd->mb_to_top_edge = -(((mi_row + i) * MI_SIZE) * 8);
+    mi_x = mi_col << MI_SIZE_LOG2;
+    mi_y = (mi_row + i) << MI_SIZE_LOG2;
+
+    for (j = 0; j < MAX_MB_PLANE; ++j) {
+      const struct macroblockd_plane *pd = &xd->plane[j];
+      bw = VPXMAX((num_4x4_blocks_wide_lookup[bsize] * 2) >> pd->subsampling_x,
+                  4);
+      bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y;
+
+      if (left_mbmi->sb_type < BLOCK_8X8) {
+        const PARTITION_TYPE bp = BLOCK_8X8 - left_mbmi->sb_type;
+        const int have_vsplit = bp != PARTITION_HORZ;
+        const int have_hsplit = bp != PARTITION_VERT;
+        const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
+        const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
+        const int ph = 8 >> (have_hsplit | pd->subsampling_y);
+        int x, y;
+
+        for (y = 0; y < num_4x4_h; ++y)
+          for (x = 0; x < num_4x4_w; ++x) {
+            if ((bp == PARTITION_VERT || bp == PARTITION_SPLIT) && x == 0 &&
+                !pd->subsampling_x)
+              continue;
+
+            build_inter_predictors(xd, j, mi_col_offset, mi_row_offset,
+                                   y * 2 + x, bw, bh, 0, 4 * y, bw, ph,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+                                   0, 0,
+#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
+                                   mi_x, mi_y);
+          }
+      } else {
+        build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, 0, bw, bh,
+                               0, 0, bw, bh,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+                               0, 0,
+#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
+                               mi_x, mi_y);
+      }
+    }
+#if CONFIG_EXT_INTER
+    *left_mbmi = backup_mbmi;
+#endif  // CONFIG_EXT_INTER
+  }
+  xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
+}
+#endif  // CONFIG_OBMC
+
+#if CONFIG_EXT_INTER
+#if CONFIG_EXT_PARTITION
+static const int ii_weights1d[MAX_SB_SIZE] = {
+  102, 100, 97, 95, 92, 90, 88, 86, 84, 82, 80, 78, 76, 74, 73, 71, 69, 68, 67,
+  65,  64,  62, 61, 60, 59, 58, 57, 55, 54, 53, 52, 52, 51, 50, 49, 48, 47, 47,
+  46,  45,  45, 44, 43, 43, 42, 41, 41, 40, 40, 39, 39, 38, 38, 38, 37, 37, 36,
+  36,  36,  35, 35, 35, 34, 34, 34, 33, 33, 33, 33, 32, 32, 32, 32, 32, 31, 31,
+  31,  31,  31, 30, 30, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, 29, 29, 28,
+  28,  28,  28, 28, 28, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 27, 27, 27, 27,
+  27,  27,  27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+};
+static int ii_size_scales[BLOCK_SIZES] = { 32, 16, 16, 16, 8, 8, 8, 4,
+                                           4,  4,  2,  2,  2, 1, 1, 1 };
+#else
+static const int ii_weights1d[MAX_SB_SIZE] = {
+  102, 100, 97, 95, 92, 90, 88, 86, 84, 82, 80, 78, 76, 74, 73, 71,
+  69,  68,  67, 65, 64, 62, 61, 60, 59, 58, 57, 55, 54, 53, 52, 52,
+  51,  50,  49, 48, 47, 47, 46, 45, 45, 44, 43, 43, 42, 41, 41, 40,
+  40,  39,  39, 38, 38, 38, 37, 37, 36, 36, 36, 35, 35, 35, 34, 34,
+};
+static int ii_size_scales[BLOCK_SIZES] = { 16, 8, 8, 8, 4, 4, 4,
+                                           2,  2, 2, 1, 1, 1 };
+#endif  // CONFIG_EXT_PARTITION
+
+static void combine_interintra(INTERINTRA_MODE mode, int use_wedge_interintra,
+                               int wedge_index, int wedge_sign,
+                               BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
+                               uint8_t *comppred, int compstride,
+                               const uint8_t *interpred, int interstride,
+                               const uint8_t *intrapred, int intrastride) {
+  const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+  const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
+  const int size_scale = ii_size_scales[plane_bsize];
+  int i, j;
+
+  if (use_wedge_interintra) {
+    if (is_interintra_wedge_used(bsize)) {
+      const uint8_t *mask =
+          vp10_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
+      const int subw = 2 * num_4x4_blocks_wide_lookup[bsize] == bw;
+      const int subh = 2 * num_4x4_blocks_high_lookup[bsize] == bh;
+      vpx_blend_a64_mask(
+          comppred, compstride, intrapred, intrastride, interpred, interstride,
+          mask, 4 * num_4x4_blocks_wide_lookup[bsize], bh, bw, subh, subw);
+    }
+    return;
+  }
+
+  switch (mode) {
+    case II_V_PRED:
+      for (i = 0; i < bh; ++i) {
+        for (j = 0; j < bw; ++j) {
+          int scale = ii_weights1d[i * size_scale];
+          comppred[i * compstride + j] =
+              VPX_BLEND_A256(scale, intrapred[i * intrastride + j],
+                             interpred[i * interstride + j]);
+        }
+      }
+      break;
+
+    case II_H_PRED:
+      for (i = 0; i < bh; ++i) {
+        for (j = 0; j < bw; ++j) {
+          int scale = ii_weights1d[j * size_scale];
+          comppred[i * compstride + j] =
+              VPX_BLEND_A256(scale, intrapred[i * intrastride + j],
+                             interpred[i * interstride + j]);
+        }
+      }
+      break;
+
+    case II_D63_PRED:
+    case II_D117_PRED:
+      for (i = 0; i < bh; ++i) {
+        for (j = 0; j < bw; ++j) {
+          int scale = (ii_weights1d[i * size_scale] * 3 +
+                       ii_weights1d[j * size_scale]) >>
+                      2;
+          comppred[i * compstride + j] =
+              VPX_BLEND_A256(scale, intrapred[i * intrastride + j],
+                             interpred[i * interstride + j]);
+        }
+      }
+      break;
+
+    case II_D207_PRED:
+    case II_D153_PRED:
+      for (i = 0; i < bh; ++i) {
+        for (j = 0; j < bw; ++j) {
+          int scale = (ii_weights1d[j * size_scale] * 3 +
+                       ii_weights1d[i * size_scale]) >>
+                      2;
+          comppred[i * compstride + j] =
+              VPX_BLEND_A256(scale, intrapred[i * intrastride + j],
+                             interpred[i * interstride + j]);
+        }
+      }
+      break;
+
+    case II_D135_PRED:
+      for (i = 0; i < bh; ++i) {
+        for (j = 0; j < bw; ++j) {
+          int scale = ii_weights1d[(i < j ? i : j) * size_scale];
+          comppred[i * compstride + j] =
+              VPX_BLEND_A256(scale, intrapred[i * intrastride + j],
+                             interpred[i * interstride + j]);
+        }
+      }
+      break;
+
+    case II_D45_PRED:
+      for (i = 0; i < bh; ++i) {
+        for (j = 0; j < bw; ++j) {
+          int scale =
+              (ii_weights1d[i * size_scale] + ii_weights1d[j * size_scale]) >>
+              1;
+          comppred[i * compstride + j] =
+              VPX_BLEND_A256(scale, intrapred[i * intrastride + j],
+                             interpred[i * interstride + j]);
+        }
+      }
+      break;
+
+    case II_TM_PRED:
+    case II_DC_PRED:
+    default:
+      for (i = 0; i < bh; ++i) {
+        for (j = 0; j < bw; ++j) {
+          comppred[i * compstride + j] = VPX_BLEND_AVG(
+              intrapred[i * intrastride + j], interpred[i * interstride + j]);
+        }
+      }
+      break;
+  }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void combine_interintra_highbd(
+    INTERINTRA_MODE mode, int use_wedge_interintra, int wedge_index,
+    int wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
+    uint8_t *comppred8, int compstride, const uint8_t *interpred8,
+    int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
+  const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+  const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
+  const int size_scale = ii_size_scales[plane_bsize];
+  int i, j;
+
+  uint16_t *comppred = CONVERT_TO_SHORTPTR(comppred8);
+  const uint16_t *interpred = CONVERT_TO_SHORTPTR(interpred8);
+  const uint16_t *intrapred = CONVERT_TO_SHORTPTR(intrapred8);
+
+  if (use_wedge_interintra) {
+    if (is_interintra_wedge_used(bsize)) {
+      const uint8_t *mask =
+          vp10_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
+      const int subh = 2 * num_4x4_blocks_high_lookup[bsize] == bh;
+      const int subw = 2 * num_4x4_blocks_wide_lookup[bsize] == bw;
+      vpx_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
+                                interpred8, interstride, mask, bw, bh, bw, subh,
+                                subw, bd);
+    }
+    return;
+  }
+
+  switch (mode) {
+    case II_V_PRED:
+      for (i = 0; i < bh; ++i) {
+        for (j = 0; j < bw; ++j) {
+          int scale = ii_weights1d[i * size_scale];
+          comppred[i * compstride + j] =
+              VPX_BLEND_A256(scale, intrapred[i * intrastride + j],
+                             interpred[i * interstride + j]);
+        }
+      }
+      break;
+
+    case II_H_PRED:
+      for (i = 0; i < bh; ++i) {
+        for (j = 0; j < bw; ++j) {
+          int scale = ii_weights1d[j * size_scale];
+          comppred[i * compstride + j] =
+              VPX_BLEND_A256(scale, intrapred[i * intrastride + j],
+                             interpred[i * interstride + j]);
+        }
+      }
+      break;
+
+    case II_D63_PRED:
+    case II_D117_PRED:
+      for (i = 0; i < bh; ++i) {
+        for (j = 0; j < bw; ++j) {
+          int scale = (ii_weights1d[i * size_scale] * 3 +
+                       ii_weights1d[j * size_scale]) >>
+                      2;
+          comppred[i * compstride + j] =
+              VPX_BLEND_A256(scale, intrapred[i * intrastride + j],
+                             interpred[i * interstride + j]);
+        }
+      }
+      break;
+
+    case II_D207_PRED:
+    case II_D153_PRED:
+      for (i = 0; i < bh; ++i) {
+        for (j = 0; j < bw; ++j) {
+          int scale = (ii_weights1d[j * size_scale] * 3 +
+                       ii_weights1d[i * size_scale]) >>
+                      2;
+          comppred[i * compstride + j] =
+              VPX_BLEND_A256(scale, intrapred[i * intrastride + j],
+                             interpred[i * interstride + j]);
+        }
+      }
+      break;
+
+    case II_D135_PRED:
+      for (i = 0; i < bh; ++i) {
+        for (j = 0; j < bw; ++j) {
+          int scale = ii_weights1d[(i < j ? i : j) * size_scale];
+          comppred[i * compstride + j] =
+              VPX_BLEND_A256(scale, intrapred[i * intrastride + j],
+                             interpred[i * interstride + j]);
+        }
+      }
+      break;
+
+    case II_D45_PRED:
+      for (i = 0; i < bh; ++i) {
+        for (j = 0; j < bw; ++j) {
+          int scale =
+              (ii_weights1d[i * size_scale] + ii_weights1d[j * size_scale]) >>
+              1;
+          comppred[i * compstride + j] =
+              VPX_BLEND_A256(scale, intrapred[i * intrastride + j],
+                             interpred[i * interstride + j]);
+        }
+      }
+      break;
+
+    case II_TM_PRED:
+    case II_DC_PRED:
+    default:
+      for (i = 0; i < bh; ++i) {
+        for (j = 0; j < bw; ++j) {
+          comppred[i * compstride + j] = VPX_BLEND_AVG(
+              interpred[i * interstride + j], intrapred[i * intrastride + j]);
+        }
+      }
+      break;
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+// Break down rectangular intra prediction for joint spatio-temporal prediction
+// into two square intra predictions.
+static void build_intra_predictors_for_interintra(MACROBLOCKD *xd, uint8_t *ref,
+                                                  int ref_stride, uint8_t *dst,
+                                                  int dst_stride,
+                                                  PREDICTION_MODE mode,
+                                                  BLOCK_SIZE bsize, int plane) {
+  BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]);
+  const int bwl = b_width_log2_lookup[plane_bsize];
+  const int bhl = b_height_log2_lookup[plane_bsize];
+  const int pxbw = 4 << bwl;
+  const int pxbh = 4 << bhl;
+  TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+
+  if (bwl == bhl) {
+    vp10_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, ref, ref_stride,
+                             dst, dst_stride, 0, 0, plane);
+
+  } else if (bwl < bhl) {
+    uint8_t *src_2 = ref + pxbw * ref_stride;
+    uint8_t *dst_2 = dst + pxbw * dst_stride;
+    vp10_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, ref, ref_stride,
+                             dst, dst_stride, 0, 0, plane);
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+      uint16_t *src_216 = CONVERT_TO_SHORTPTR(src_2);
+      uint16_t *dst_216 = CONVERT_TO_SHORTPTR(dst_2);
+      memcpy(src_216 - ref_stride, dst_216 - dst_stride,
+             sizeof(*src_216) * pxbw);
+    } else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+    {
+      memcpy(src_2 - ref_stride, dst_2 - dst_stride, sizeof(*src_2) * pxbw);
+    }
+    vp10_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, src_2, ref_stride,
+                             dst_2, dst_stride, 0, 1 << bwl, plane);
+  } else {  // bwl > bhl
+    int i;
+    uint8_t *src_2 = ref + pxbh;
+    uint8_t *dst_2 = dst + pxbh;
+    vp10_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, ref, ref_stride,
+                             dst, dst_stride, 0, 0, plane);
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+      uint16_t *src_216 = CONVERT_TO_SHORTPTR(src_2);
+      uint16_t *dst_216 = CONVERT_TO_SHORTPTR(dst_2);
+      for (i = 0; i < pxbh; ++i)
+        src_216[i * ref_stride - 1] = dst_216[i * dst_stride - 1];
+    } else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+    {
+      for (i = 0; i < pxbh; ++i)
+        src_2[i * ref_stride - 1] = dst_2[i * dst_stride - 1];
+    }
+    vp10_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, src_2, ref_stride,
+                             dst_2, dst_stride, 1 << bhl, 0, plane);
+  }
+}
+
+// Mapping of interintra to intra mode for use in the intra component
+static const int interintra_to_intra_mode[INTERINTRA_MODES] = {
+  DC_PRED,   V_PRED,    H_PRED,    D45_PRED, D135_PRED,
+  D117_PRED, D153_PRED, D207_PRED, D63_PRED, TM_PRED
+};
+
+void vp10_build_intra_predictors_for_interintra(MACROBLOCKD *xd,
+                                                BLOCK_SIZE bsize, int plane,
+                                                uint8_t *dst, int dst_stride) {
+  build_intra_predictors_for_interintra(
+      xd, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, dst,
+      dst_stride, interintra_to_intra_mode[xd->mi[0]->mbmi.interintra_mode],
+      bsize, plane);
+}
+
+void vp10_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
+                             const uint8_t *inter_pred, int inter_stride,
+                             const uint8_t *intra_pred, int intra_stride) {
+  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]);
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    combine_interintra_highbd(
+        xd->mi[0]->mbmi.interintra_mode, xd->mi[0]->mbmi.use_wedge_interintra,
+        xd->mi[0]->mbmi.interintra_wedge_index,
+        xd->mi[0]->mbmi.interintra_wedge_sign, bsize, plane_bsize,
+        xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, inter_pred,
+        inter_stride, intra_pred, intra_stride, xd->bd);
+    return;
+  }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+  combine_interintra(xd->mi[0]->mbmi.interintra_mode,
+                     xd->mi[0]->mbmi.use_wedge_interintra,
+                     xd->mi[0]->mbmi.interintra_wedge_index,
+                     xd->mi[0]->mbmi.interintra_wedge_sign, bsize, plane_bsize,
+                     xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
+                     inter_pred, inter_stride, intra_pred, intra_stride);
+}
+
+void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd, uint8_t *ypred,
+                                          int ystride, BLOCK_SIZE bsize) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
+    vp10_build_intra_predictors_for_interintra(
+        xd, bsize, 0, CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
+    vp10_combine_interintra(xd, bsize, 0, ypred, ystride,
+                            CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
+    return;
+  }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+  {
+    DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
+    vp10_build_intra_predictors_for_interintra(xd, bsize, 0, intrapredictor,
+                                               MAX_SB_SIZE);
+    vp10_combine_interintra(xd, bsize, 0, ypred, ystride, intrapredictor,
+                            MAX_SB_SIZE);
+  }
+}
+
+void vp10_build_interintra_predictors_sbc(MACROBLOCKD *xd, uint8_t *upred,
+                                          int ustride, int plane,
+                                          BLOCK_SIZE bsize) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    DECLARE_ALIGNED(16, uint16_t, uintrapredictor[MAX_SB_SQUARE]);
+    vp10_build_intra_predictors_for_interintra(
+        xd, bsize, plane, CONVERT_TO_BYTEPTR(uintrapredictor), MAX_SB_SIZE);
+    vp10_combine_interintra(xd, bsize, plane, upred, ustride,
+                            CONVERT_TO_BYTEPTR(uintrapredictor), MAX_SB_SIZE);
+    return;
+  }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+  {
+    DECLARE_ALIGNED(16, uint8_t, uintrapredictor[MAX_SB_SQUARE]);
+    vp10_build_intra_predictors_for_interintra(xd, bsize, plane,
+                                               uintrapredictor, MAX_SB_SIZE);
+    vp10_combine_interintra(xd, bsize, plane, upred, ustride, uintrapredictor,
+                            MAX_SB_SIZE);
+  }
+}
+
+void vp10_build_interintra_predictors_sbuv(MACROBLOCKD *xd, uint8_t *upred,
+                                           uint8_t *vpred, int ustride,
+                                           int vstride, BLOCK_SIZE bsize) {
+  vp10_build_interintra_predictors_sbc(xd, upred, ustride, 1, bsize);
+  vp10_build_interintra_predictors_sbc(xd, vpred, vstride, 2, bsize);
+}
+
+void vp10_build_interintra_predictors(MACROBLOCKD *xd, uint8_t *ypred,
+                                      uint8_t *upred, uint8_t *vpred,
+                                      int ystride, int ustride, int vstride,
+                                      BLOCK_SIZE bsize) {
+  vp10_build_interintra_predictors_sby(xd, ypred, ystride, bsize);
+  vp10_build_interintra_predictors_sbuv(xd, upred, vpred, ustride, vstride,
+                                        bsize);
+}
+
+// Builds the inter-predictor for the single ref case
+// for use in the encoder to search the wedges efficiently.
+static void build_inter_predictors_single_buf(MACROBLOCKD *xd, int plane,
+                                              int block, int bw, int bh, int x,
+                                              int y, int w, int h, int mi_x,
+                                              int mi_y, int ref,
+                                              uint8_t *const ext_dst,
+                                              int ext_dst_stride) {
+  struct macroblockd_plane *const pd = &xd->plane[plane];
+  const MODE_INFO *mi = xd->mi[0];
+
+  const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
+  struct buf_2d *const pre_buf = &pd->pre[ref];
+#if CONFIG_VP9_HIGHBITDEPTH
+  uint8_t *const dst =
+      (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH ? CONVERT_TO_BYTEPTR(ext_dst)
+                                                   : ext_dst) +
+      ext_dst_stride * y + x;
+#else
+  uint8_t *const dst = ext_dst + ext_dst_stride * y + x;
+#endif
+  const MV mv = mi->mbmi.sb_type < BLOCK_8X8
+                    ? average_split_mvs(pd, mi, ref, block)
+                    : mi->mbmi.mv[ref].as_mv;
+
+  // TODO(jkoleszar): This clamping is done in the incorrect place for the
+  // scaling case. It needs to be done on the scaled MV, not the pre-scaling
+  // MV. Note however that it performs the subsampling aware scaling so
+  // that the result is always q4.
+  // mv_precision precision is MV_PRECISION_Q4.
+  const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh, pd->subsampling_x,
+                                             pd->subsampling_y);
+
+  uint8_t *pre;
+  MV32 scaled_mv;
+  int xs, ys, subpel_x, subpel_y;
+  const int is_scaled = vp10_is_scaled(sf);
+
+  if (is_scaled) {
+    pre = pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf);
+    scaled_mv = vp10_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
+    xs = sf->x_step_q4;
+    ys = sf->y_step_q4;
+  } else {
+    pre = pre_buf->buf + (y * pre_buf->stride + x);
+    scaled_mv.row = mv_q4.row;
+    scaled_mv.col = mv_q4.col;
+    xs = ys = 16;
+  }
+
+  subpel_x = scaled_mv.col & SUBPEL_MASK;
+  subpel_y = scaled_mv.row & SUBPEL_MASK;
+  pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride +
+         (scaled_mv.col >> SUBPEL_BITS);
+
+  vp10_make_inter_predictor(pre, pre_buf->stride, dst, ext_dst_stride, subpel_x,
+                            subpel_y, sf, w, h, 0, mi->mbmi.interp_filter, xs,
+                            ys, xd);
+}
+
+void vp10_build_inter_predictors_for_planes_single_buf(
+    MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to, int mi_row,
+    int mi_col, int ref, uint8_t *ext_dst[3], int ext_dst_stride[3]) {
+  int plane;
+  const int mi_x = mi_col * MI_SIZE;
+  const int mi_y = mi_row * MI_SIZE;
+  for (plane = plane_from; plane <= plane_to; ++plane) {
+    const BLOCK_SIZE plane_bsize =
+        get_plane_block_size(bsize, &xd->plane[plane]);
+    const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+    const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+    const int bw = 4 * num_4x4_w;
+    const int bh = 4 * num_4x4_h;
+
+    if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
+      int x, y;
+      assert(bsize == BLOCK_8X8);
+      for (y = 0; y < num_4x4_h; ++y)
+        for (x = 0; x < num_4x4_w; ++x)
+          build_inter_predictors_single_buf(
+              xd, plane, y * 2 + x, bw, bh, 4 * x, 4 * y, 4, 4, mi_x, mi_y, ref,
+              ext_dst[plane], ext_dst_stride[plane]);
+    } else {
+      build_inter_predictors_single_buf(xd, plane, 0, bw, bh, 0, 0, bw, bh,
+                                        mi_x, mi_y, ref, ext_dst[plane],
+                                        ext_dst_stride[plane]);
+    }
+  }
+}
+
+static void build_wedge_inter_predictor_from_buf(
+    MACROBLOCKD *xd, int plane, int x, int y, int w, int h, uint8_t *ext_dst0,
+    int ext_dst_stride0, uint8_t *ext_dst1, int ext_dst_stride1) {
+  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  const int is_compound = has_second_ref(mbmi);
+  MACROBLOCKD_PLANE *const pd = &xd->plane[plane];
+  struct buf_2d *const dst_buf = &pd->dst;
+  uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
+
+  if (is_compound && is_interinter_wedge_used(mbmi->sb_type) &&
+      mbmi->use_wedge_interinter) {
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+      build_masked_compound_wedge_highbd(
+          dst, dst_buf->stride, CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
+          CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1,
+          mbmi->interinter_wedge_index, mbmi->interinter_wedge_sign,
+          mbmi->sb_type, h, w, xd->bd);
+    else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+      build_masked_compound_wedge(
+          dst, dst_buf->stride, ext_dst0, ext_dst_stride0, ext_dst1,
+          ext_dst_stride1, mbmi->interinter_wedge_index,
+          mbmi->interinter_wedge_sign, mbmi->sb_type, h, w);
+  } else {
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+      vpx_highbd_convolve_copy(CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
+                               dst, dst_buf->stride, NULL, 0, NULL, 0, w, h,
+                               xd->bd);
+    else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+      vpx_convolve_copy(ext_dst0, ext_dst_stride0, dst, dst_buf->stride, NULL,
+                        0, NULL, 0, w, h);
+  }
+}
+
+void vp10_build_wedge_inter_predictor_from_buf(
+    MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to,
+    uint8_t *ext_dst0[3], int ext_dst_stride0[3], uint8_t *ext_dst1[3],
+    int ext_dst_stride1[3]) {
+  int plane;
+  for (plane = plane_from; plane <= plane_to; ++plane) {
+    const BLOCK_SIZE plane_bsize =
+        get_plane_block_size(bsize, &xd->plane[plane]);
+    const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+    const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+
+    if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
+      int x, y;
+      assert(bsize == BLOCK_8X8);
+      for (y = 0; y < num_4x4_h; ++y)
+        for (x = 0; x < num_4x4_w; ++x)
+          build_wedge_inter_predictor_from_buf(
+              xd, plane, 4 * x, 4 * y, 4, 4, ext_dst0[plane],
+              ext_dst_stride0[plane], ext_dst1[plane], ext_dst_stride1[plane]);
+    } else {
+      const int bw = 4 * num_4x4_w;
+      const int bh = 4 * num_4x4_h;
+      build_wedge_inter_predictor_from_buf(
+          xd, plane, 0, 0, bw, bh, ext_dst0[plane], ext_dst_stride0[plane],
+          ext_dst1[plane], ext_dst_stride1[plane]);
+    }
+  }
+}
+#endif  // CONFIG_EXT_INTER
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h
new file mode 100644
index 0000000..092926d
--- /dev/null
+++ b/av1/common/reconinter.h
@@ -0,0 +1,596 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_RECONINTER_H_
+#define VP10_COMMON_RECONINTER_H_
+
+#include "av1/common/filter.h"
+#include "av1/common/onyxc_int.h"
+#include "av1/common/vp10_convolve.h"
+#include "aom/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static INLINE void inter_predictor(const uint8_t *src, int src_stride,
+                                   uint8_t *dst, int dst_stride,
+                                   const int subpel_x, const int subpel_y,
+                                   const struct scale_factors *sf, int w, int h,
+                                   int ref_idx,
+#if CONFIG_DUAL_FILTER
+                                   const INTERP_FILTER *interp_filter,
+#else
+                                   const INTERP_FILTER interp_filter,
+#endif
+                                   int xs, int ys) {
+#if CONFIG_DUAL_FILTER
+  InterpFilterParams interp_filter_params_x =
+      vp10_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
+  InterpFilterParams interp_filter_params_y =
+      vp10_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
+#else
+  InterpFilterParams interp_filter_params =
+      vp10_get_interp_filter_params(interp_filter);
+#endif
+
+#if CONFIG_DUAL_FILTER
+  if (interp_filter_params_x.taps == SUBPEL_TAPS &&
+      interp_filter_params_y.taps == SUBPEL_TAPS && w > 2 && h > 2) {
+    const int16_t *kernel_x =
+        vp10_get_interp_filter_subpel_kernel(interp_filter_params_x, subpel_x);
+    const int16_t *kernel_y =
+        vp10_get_interp_filter_subpel_kernel(interp_filter_params_y, subpel_y);
+#else
+  if (interp_filter_params.taps == SUBPEL_TAPS) {
+    const int16_t *kernel_x =
+        vp10_get_interp_filter_subpel_kernel(interp_filter_params, subpel_x);
+    const int16_t *kernel_y =
+        vp10_get_interp_filter_subpel_kernel(interp_filter_params, subpel_y);
+#endif
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+    if (IsInterpolatingFilter(interp_filter)) {
+      // Interpolating filter
+      sf->predict[subpel_x != 0][subpel_y != 0][ref](
+          src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h);
+    } else {
+      sf->predict_ni[subpel_x != 0][subpel_y != 0][ref](
+          src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h);
+    }
+#else
+    sf->predict[subpel_x != 0][subpel_y != 0][ref_idx](
+        src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h);
+#endif  // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+  } else {
+    // ref_idx > 0 means this is the second reference frame
+    // first reference frame's prediction result is already in dst
+    // therefore we need to average the first and second results
+    vp10_convolve(src, src_stride, dst, dst_stride, w, h, interp_filter,
+                  subpel_x, xs, subpel_y, ys, ref_idx);
+  }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride,
+                                          uint8_t *dst, int dst_stride,
+                                          const int subpel_x,
+                                          const int subpel_y,
+                                          const struct scale_factors *sf, int w,
+                                          int h, int ref,
+#if CONFIG_DUAL_FILTER
+                                          const INTERP_FILTER *interp_filter,
+#else
+                                          const INTERP_FILTER interp_filter,
+#endif
+                                          int xs, int ys, int bd) {
+#if CONFIG_DUAL_FILTER
+  InterpFilterParams interp_filter_params_x =
+      vp10_get_interp_filter_params(interp_filter[1 + 2 * ref]);
+  InterpFilterParams interp_filter_params_y =
+      vp10_get_interp_filter_params(interp_filter[0 + 2 * ref]);
+#else
+  InterpFilterParams interp_filter_params =
+      vp10_get_interp_filter_params(interp_filter);
+#endif
+
+#if CONFIG_DUAL_FILTER
+  if (interp_filter_params_x.taps == SUBPEL_TAPS &&
+      interp_filter_params_y.taps == SUBPEL_TAPS && w > 2 && h > 2) {
+    const int16_t *kernel_x =
+        vp10_get_interp_filter_subpel_kernel(interp_filter_params_x, subpel_x);
+    const int16_t *kernel_y =
+        vp10_get_interp_filter_subpel_kernel(interp_filter_params_y, subpel_y);
+#else
+  if (interp_filter_params.taps == SUBPEL_TAPS) {
+    const int16_t *kernel_x =
+        vp10_get_interp_filter_subpel_kernel(interp_filter_params, subpel_x);
+    const int16_t *kernel_y =
+        vp10_get_interp_filter_subpel_kernel(interp_filter_params, subpel_y);
+#endif  // CONFIG_DUAL_FILTER
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+    if (IsInterpolatingFilter(interp_filter)) {
+      // Interpolating filter
+      sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
+          src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h,
+          bd);
+    } else {
+      sf->highbd_predict_ni[subpel_x != 0][subpel_y != 0][ref](
+          src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h,
+          bd);
+    }
+#else
+    sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
+        src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h, bd);
+#endif  // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+  } else {
+    // ref > 0 means this is the second reference frame
+    // first reference frame's prediction result is already in dst
+    // therefore we need to average the first and second results
+    int avg = ref > 0;
+    vp10_highbd_convolve(src, src_stride, dst, dst_stride, w, h, interp_filter,
+                         subpel_x, xs, subpel_y, ys, avg, bd);
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#if CONFIG_EXT_INTER
+// Set to one to use larger codebooks
+#define USE_LARGE_WEDGE_CODEBOOK 0
+
+#if USE_LARGE_WEDGE_CODEBOOK
+#define MAX_WEDGE_TYPES (1 << 5)
+#else
+#define MAX_WEDGE_TYPES (1 << 4)
+#endif
+
+#define MAX_WEDGE_SIZE_LOG2 5  // 32x32
+#define MAX_WEDGE_SIZE (1 << MAX_WEDGE_SIZE_LOG2)
+#define MAX_WEDGE_SQUARE (MAX_WEDGE_SIZE * MAX_WEDGE_SIZE)
+
+#define WEDGE_WEIGHT_BITS 6
+
+#define WEDGE_NONE -1
+
+// Angles are with respect to horizontal anti-clockwise
+typedef enum {
+  WEDGE_HORIZONTAL = 0,
+  WEDGE_VERTICAL = 1,
+  WEDGE_OBLIQUE27 = 2,
+  WEDGE_OBLIQUE63 = 3,
+  WEDGE_OBLIQUE117 = 4,
+  WEDGE_OBLIQUE153 = 5,
+  WEDGE_DIRECTIONS
+} WedgeDirectionType;
+
+// 3-tuple: {direction, x_offset, y_offset}
+typedef struct {
+  WedgeDirectionType direction;
+  int x_offset;
+  int y_offset;
+} wedge_code_type;
+
+typedef uint8_t *wedge_masks_type[MAX_WEDGE_TYPES];
+
+typedef struct {
+  int bits;
+  const wedge_code_type *codebook;
+  uint8_t *signflip;
+  int smoother;
+  wedge_masks_type *masks;
+} wedge_params_type;
+
+extern const wedge_params_type wedge_params_lookup[BLOCK_SIZES];
+
+static INLINE int get_wedge_bits_lookup(BLOCK_SIZE sb_type) {
+  return wedge_params_lookup[sb_type].bits;
+}
+
+static INLINE int is_interinter_wedge_used(BLOCK_SIZE sb_type) {
+  (void)sb_type;
+  return wedge_params_lookup[sb_type].bits > 0;
+}
+
+static INLINE int get_interinter_wedge_bits(BLOCK_SIZE sb_type) {
+  const int wbits = wedge_params_lookup[sb_type].bits;
+  return (wbits > 0) ? wbits + 1 : 0;
+}
+
+static INLINE int is_interintra_wedge_used(BLOCK_SIZE sb_type) {
+  (void)sb_type;
+  return wedge_params_lookup[sb_type].bits > 0;
+}
+
+static INLINE int get_interintra_wedge_bits(BLOCK_SIZE sb_type) {
+  return wedge_params_lookup[sb_type].bits;
+}
+#endif  // CONFIG_EXT_INTER
+
+void build_inter_predictors(MACROBLOCKD *xd, int plane,
+#if CONFIG_OBMC
+                            int mi_col_offset, int mi_row_offset,
+#endif  // CONFIG_OBMC
+                            int block, int bw, int bh, int x, int y, int w,
+                            int h,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+                            int wedge_offset_x, int wedge_offset_y,
+#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
+                            int mi_x, int mi_y);
+
+static INLINE void vp10_make_inter_predictor(
+    const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride,
+    const int subpel_x, const int subpel_y, const struct scale_factors *sf,
+    int w, int h, int ref,
+#if CONFIG_DUAL_FILTER
+    const INTERP_FILTER *interp_filter,
+#else
+    const INTERP_FILTER interp_filter,
+#endif
+    int xs, int ys, const MACROBLOCKD *xd) {
+  (void)xd;
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+    highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
+                           sf, w, h, ref, interp_filter, xs, ys, xd->bd);
+  else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+    inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w,
+                    h, ref, interp_filter, xs, ys);
+}
+
+#if CONFIG_EXT_INTER
+void vp10_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
+                                      uint8_t *dst, int dst_stride,
+                                      const int subpel_x, const int subpel_y,
+                                      const struct scale_factors *sf, int w,
+                                      int h,
+#if CONFIG_DUAL_FILTER
+                                      const INTERP_FILTER *interp_filter,
+#else
+                                      const INTERP_FILTER interp_filter,
+#endif
+                                      int xs, int ys,
+#if CONFIG_SUPERTX
+                                      int wedge_offset_x, int wedge_offset_y,
+#endif  // CONFIG_SUPERTX
+                                      const MACROBLOCKD *xd);
+#endif  // CONFIG_EXT_INTER
+
+static INLINE int round_mv_comp_q4(int value) {
+  return (value < 0 ? value - 2 : value + 2) / 4;
+}
+
+static MV mi_mv_pred_q4(const MODE_INFO *mi, int idx) {
+  MV res = {
+    round_mv_comp_q4(
+        mi->bmi[0].as_mv[idx].as_mv.row + mi->bmi[1].as_mv[idx].as_mv.row +
+        mi->bmi[2].as_mv[idx].as_mv.row + mi->bmi[3].as_mv[idx].as_mv.row),
+    round_mv_comp_q4(
+        mi->bmi[0].as_mv[idx].as_mv.col + mi->bmi[1].as_mv[idx].as_mv.col +
+        mi->bmi[2].as_mv[idx].as_mv.col + mi->bmi[3].as_mv[idx].as_mv.col)
+  };
+  return res;
+}
+
+static INLINE int round_mv_comp_q2(int value) {
+  return (value < 0 ? value - 1 : value + 1) / 2;
+}
+
+static MV mi_mv_pred_q2(const MODE_INFO *mi, int idx, int block0, int block1) {
+  MV res = { round_mv_comp_q2(mi->bmi[block0].as_mv[idx].as_mv.row +
+                              mi->bmi[block1].as_mv[idx].as_mv.row),
+             round_mv_comp_q2(mi->bmi[block0].as_mv[idx].as_mv.col +
+                              mi->bmi[block1].as_mv[idx].as_mv.col) };
+  return res;
+}
+
+// TODO(jkoleszar): yet another mv clamping function :-(
+static INLINE MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd,
+                                           const MV *src_mv, int bw, int bh,
+                                           int ss_x, int ss_y) {
+  // If the MV points so far into the UMV border that no visible pixels
+  // are used for reconstruction, the subpel part of the MV can be
+  // discarded and the MV limited to 16 pixels with equivalent results.
+  const int spel_left = (VPX_INTERP_EXTEND + bw) << SUBPEL_BITS;
+  const int spel_right = spel_left - SUBPEL_SHIFTS;
+  const int spel_top = (VPX_INTERP_EXTEND + bh) << SUBPEL_BITS;
+  const int spel_bottom = spel_top - SUBPEL_SHIFTS;
+  MV clamped_mv = { src_mv->row * (1 << (1 - ss_y)),
+                    src_mv->col * (1 << (1 - ss_x)) };
+  assert(ss_x <= 1);
+  assert(ss_y <= 1);
+
+  clamp_mv(&clamped_mv, xd->mb_to_left_edge * (1 << (1 - ss_x)) - spel_left,
+           xd->mb_to_right_edge * (1 << (1 - ss_x)) + spel_right,
+           xd->mb_to_top_edge * (1 << (1 - ss_y)) - spel_top,
+           xd->mb_to_bottom_edge * (1 << (1 - ss_y)) + spel_bottom);
+
+  return clamped_mv;
+}
+
+static INLINE MV average_split_mvs(const struct macroblockd_plane *pd,
+                                   const MODE_INFO *mi, int ref, int block) {
+  const int ss_idx = ((pd->subsampling_x > 0) << 1) | (pd->subsampling_y > 0);
+  MV res = { 0, 0 };
+  switch (ss_idx) {
+    case 0: res = mi->bmi[block].as_mv[ref].as_mv; break;
+    case 1: res = mi_mv_pred_q2(mi, ref, block, block + 2); break;
+    case 2: res = mi_mv_pred_q2(mi, ref, block, block + 1); break;
+    case 3: res = mi_mv_pred_q4(mi, ref); break;
+    default: assert(ss_idx <= 3 && ss_idx >= 0);
+  }
+  return res;
+}
+
+void vp10_build_inter_predictor_sub8x8(MACROBLOCKD *xd, int plane, int i,
+                                       int ir, int ic, int mi_row, int mi_col);
+
+void vp10_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col,
+                                     BLOCK_SIZE bsize);
+
+void vp10_build_inter_predictors_sbp(MACROBLOCKD *xd, int mi_row, int mi_col,
+                                     BLOCK_SIZE bsize, int plane);
+
+void vp10_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col,
+                                      BLOCK_SIZE bsize);
+
+void vp10_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
+                                    BLOCK_SIZE bsize);
+
+#if CONFIG_SUPERTX
+void vp10_build_inter_predictors_sb_sub8x8_extend(MACROBLOCKD *xd,
+#if CONFIG_EXT_INTER
+                                                  int mi_row_ori,
+                                                  int mi_col_ori,
+#endif  // CONFIG_EXT_INTER
+                                                  int mi_row, int mi_col,
+                                                  BLOCK_SIZE bsize, int block);
+
+void vp10_build_inter_predictors_sb_extend(MACROBLOCKD *xd,
+#if CONFIG_EXT_INTER
+                                           int mi_row_ori, int mi_col_ori,
+#endif  // CONFIG_EXT_INTER
+                                           int mi_row, int mi_col,
+                                           BLOCK_SIZE bsize);
+struct macroblockd_plane;
+void vp10_build_masked_inter_predictor_complex(
+    MACROBLOCKD *xd, uint8_t *dst, int dst_stride, const uint8_t *pre,
+    int pre_stride, int mi_row, int mi_col, int mi_row_ori, int mi_col_ori,
+    BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, PARTITION_TYPE partition,
+    int plane);
+#endif  // CONFIG_SUPERTX
+
+void vp10_build_inter_predictor(const uint8_t *src, int src_stride,
+                                uint8_t *dst, int dst_stride, const MV *mv_q3,
+                                const struct scale_factors *sf, int w, int h,
+                                int do_avg,
+#if CONFIG_DUAL_FILTER
+                                const INTERP_FILTER *interp_filter,
+#else
+                                const INTERP_FILTER interp_filter,
+#endif
+                                enum mv_precision precision, int x, int y);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_build_inter_predictor(
+    const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride,
+    const MV *mv_q3, const struct scale_factors *sf, int w, int h, int do_avg,
+#if CONFIG_DUAL_FILTER
+    const INTERP_FILTER *interp_filter,
+#else
+    const INTERP_FILTER interp_filter,
+#endif
+    enum mv_precision precision, int x, int y, int bd);
+#endif
+
+static INLINE int scaled_buffer_offset(int x_offset, int y_offset, int stride,
+                                       const struct scale_factors *sf) {
+  const int x = sf ? sf->scale_value_x(x_offset, sf) : x_offset;
+  const int y = sf ? sf->scale_value_y(y_offset, sf) : y_offset;
+  return y * stride + x;
+}
+
+static INLINE void setup_pred_plane(struct buf_2d *dst, uint8_t *src, int width,
+                                    int height, int stride, int mi_row,
+                                    int mi_col,
+                                    const struct scale_factors *scale,
+                                    int subsampling_x, int subsampling_y) {
+  const int x = (MI_SIZE * mi_col) >> subsampling_x;
+  const int y = (MI_SIZE * mi_row) >> subsampling_y;
+  dst->buf = src + scaled_buffer_offset(x, y, stride, scale);
+  dst->buf0 = src;
+  dst->width = width;
+  dst->height = height;
+  dst->stride = stride;
+}
+
+void vp10_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE],
+                           const YV12_BUFFER_CONFIG *src, int mi_row,
+                           int mi_col);
+
+void vp10_setup_pre_planes(MACROBLOCKD *xd, int idx,
+                           const YV12_BUFFER_CONFIG *src, int mi_row,
+                           int mi_col, const struct scale_factors *sf);
+
+#if CONFIG_DUAL_FILTER
+// Detect if the block have sub-pixel level motion vectors
+// per component.
+static INLINE int has_subpel_mv_component(const MODE_INFO *const mi,
+                                          const MACROBLOCKD *const xd,
+                                          int dir) {
+  const MB_MODE_INFO *const mbmi = &mi->mbmi;
+  const BLOCK_SIZE bsize = mbmi->sb_type;
+  int plane;
+  int ref = (dir >> 1);
+
+  if (bsize >= BLOCK_8X8) {
+    if (dir & 0x01) {
+      if (mbmi->mv[ref].as_mv.col & SUBPEL_MASK) return 1;
+    } else {
+      if (mbmi->mv[ref].as_mv.row & SUBPEL_MASK) return 1;
+    }
+  } else {
+    for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+      const PARTITION_TYPE bp = BLOCK_8X8 - bsize;
+      const struct macroblockd_plane *const pd = &xd->plane[plane];
+      const int have_vsplit = bp != PARTITION_HORZ;
+      const int have_hsplit = bp != PARTITION_VERT;
+      const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
+      const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
+
+      int x, y;
+      for (y = 0; y < num_4x4_h; ++y) {
+        for (x = 0; x < num_4x4_w; ++x) {
+          const MV mv = average_split_mvs(pd, mi, ref, y * 2 + x);
+          if (dir & 0x01) {
+            if (mv.col & SUBPEL_MASK) return 1;
+          } else {
+            if (mv.row & SUBPEL_MASK) return 1;
+          }
+        }
+      }
+    }
+  }
+
+  return 0;
+}
+#endif
+
+#if CONFIG_EXT_INTERP
+static INLINE int vp10_is_interp_needed(const MACROBLOCKD *const xd) {
+  MODE_INFO *const mi = xd->mi[0];
+  MB_MODE_INFO *const mbmi = &mi->mbmi;
+  const BLOCK_SIZE bsize = mbmi->sb_type;
+  const int is_compound = has_second_ref(mbmi);
+  int intpel_mv = 1;
+  int plane;
+
+#if SUPPORT_NONINTERPOLATING_FILTERS
+  // TODO(debargha): This is is currently only for experimentation
+  // with non-interpolating filters. Remove later.
+  // If any of the filters are non-interpolating, then indicate the
+  // interpolation filter always.
+  int i;
+  for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
+    if (!IsInterpolatingFilter(i)) return 1;
+  }
+#endif
+
+  // For scaled references, interpolation filter is indicated all the time.
+  if (vp10_is_scaled(&xd->block_refs[0]->sf)) return 1;
+  if (is_compound && vp10_is_scaled(&xd->block_refs[1]->sf)) return 1;
+
+  if (bsize < BLOCK_8X8) {
+    for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+      const PARTITION_TYPE bp = BLOCK_8X8 - bsize;
+      const struct macroblockd_plane *const pd = &xd->plane[plane];
+      const int have_vsplit = bp != PARTITION_HORZ;
+      const int have_hsplit = bp != PARTITION_VERT;
+      const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
+      const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
+      int ref;
+      for (ref = 0; ref < 1 + is_compound; ++ref) {
+        int x, y;
+        for (y = 0; y < num_4x4_h; ++y)
+          for (x = 0; x < num_4x4_w; ++x) {
+            const MV mv = average_split_mvs(pd, mi, ref, y * 2 + x);
+            if (mv_has_subpel(&mv)) return 1;
+          }
+      }
+    }
+    return 0;
+  } else {
+    intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
+    if (is_compound && intpel_mv) {
+      intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
+    }
+  }
+  return !intpel_mv;
+}
+#endif  // CONFIG_EXT_INTERP
+
+#if CONFIG_OBMC
+const uint8_t *vp10_get_obmc_mask(int length);
+void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, MACROBLOCKD *xd,
+                                      int mi_row, int mi_col,
+                                      uint8_t *above[MAX_MB_PLANE],
+                                      int above_stride[MAX_MB_PLANE],
+                                      uint8_t *left[MAX_MB_PLANE],
+                                      int left_stride[MAX_MB_PLANE]);
+void vp10_build_prediction_by_above_preds(VP10_COMMON *cm, MACROBLOCKD *xd,
+                                          int mi_row, int mi_col,
+                                          uint8_t *tmp_buf[MAX_MB_PLANE],
+                                          int tmp_width[MAX_MB_PLANE],
+                                          int tmp_height[MAX_MB_PLANE],
+                                          int tmp_stride[MAX_MB_PLANE]);
+void vp10_build_prediction_by_left_preds(VP10_COMMON *cm, MACROBLOCKD *xd,
+                                         int mi_row, int mi_col,
+                                         uint8_t *tmp_buf[MAX_MB_PLANE],
+                                         int tmp_width[MAX_MB_PLANE],
+                                         int tmp_height[MAX_MB_PLANE],
+                                         int tmp_stride[MAX_MB_PLANE]);
+#endif  // CONFIG_OBMC
+
+#if CONFIG_EXT_INTER
+#define MASK_MASTER_SIZE (2 * MAX_SB_SIZE)
+#define MASK_MASTER_STRIDE (2 * MAX_SB_SIZE)
+
+void vp10_init_wedge_masks();
+
+static INLINE const uint8_t *vp10_get_contiguous_soft_mask(int wedge_index,
+                                                           int wedge_sign,
+                                                           BLOCK_SIZE sb_type) {
+  return wedge_params_lookup[sb_type].masks[wedge_sign][wedge_index];
+}
+
+const uint8_t *vp10_get_soft_mask(int wedge_index, int wedge_sign,
+                                  BLOCK_SIZE sb_type, int wedge_offset_x,
+                                  int wedge_offset_y);
+
+void vp10_build_interintra_predictors(MACROBLOCKD *xd, uint8_t *ypred,
+                                      uint8_t *upred, uint8_t *vpred,
+                                      int ystride, int ustride, int vstride,
+                                      BLOCK_SIZE bsize);
+void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd, uint8_t *ypred,
+                                          int ystride, BLOCK_SIZE bsize);
+void vp10_build_interintra_predictors_sbc(MACROBLOCKD *xd, uint8_t *upred,
+                                          int ustride, int plane,
+                                          BLOCK_SIZE bsize);
+void vp10_build_interintra_predictors_sbuv(MACROBLOCKD *xd, uint8_t *upred,
+                                           uint8_t *vpred, int ustride,
+                                           int vstride, BLOCK_SIZE bsize);
+
+void vp10_build_intra_predictors_for_interintra(MACROBLOCKD *xd,
+                                                BLOCK_SIZE bsize, int plane,
+                                                uint8_t *intra_pred,
+                                                int intra_stride);
+void vp10_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
+                             const uint8_t *inter_pred, int inter_stride,
+                             const uint8_t *intra_pred, int intra_stride);
+void vp10_build_interintra_predictors_sbuv(MACROBLOCKD *xd, uint8_t *upred,
+                                           uint8_t *vpred, int ustride,
+                                           int vstride, BLOCK_SIZE bsize);
+void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd, uint8_t *ypred,
+                                          int ystride, BLOCK_SIZE bsize);
+
+// Encoder only
+void vp10_build_inter_predictors_for_planes_single_buf(
+    MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to, int mi_row,
+    int mi_col, int ref, uint8_t *ext_dst[3], int ext_dst_stride[3]);
+void vp10_build_wedge_inter_predictor_from_buf(
+    MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to,
+    uint8_t *ext_dst0[3], int ext_dst_stride0[3], uint8_t *ext_dst1[3],
+    int ext_dst_stride1[3]);
+#endif  // CONFIG_EXT_INTER
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_RECONINTER_H_
diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c
new file mode 100644
index 0000000..801f61e
--- /dev/null
+++ b/av1/common/reconintra.c
@@ -0,0 +1,1574 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+
+#include "./vp10_rtcd.h"
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "aom_ports/system_state.h"
+
+#if CONFIG_VP9_HIGHBITDEPTH
+#include "aom_dsp/vpx_dsp_common.h"
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#include "aom_mem/vpx_mem.h"
+#include "aom_ports/mem.h"
+#include "aom_ports/vpx_once.h"
+#if CONFIG_EXT_INTRA
+#include "av1/common/intra_filters.h"
+#endif
+#include "av1/common/reconintra.h"
+#include "av1/common/onyxc_int.h"
+
+enum {
+  NEED_LEFT = 1 << 1,
+  NEED_ABOVE = 1 << 2,
+  NEED_ABOVERIGHT = 1 << 3,
+  NEED_ABOVELEFT = 1 << 4,
+  NEED_BOTTOMLEFT = 1 << 5,
+};
+
+static const uint8_t extend_modes[INTRA_MODES] = {
+  NEED_ABOVE | NEED_LEFT,                   // DC
+  NEED_ABOVE,                               // V
+  NEED_LEFT,                                // H
+  NEED_ABOVE | NEED_ABOVERIGHT,             // D45
+  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D135
+  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D117
+  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D153
+  NEED_LEFT | NEED_BOTTOMLEFT,              // D207
+  NEED_ABOVE | NEED_ABOVERIGHT,             // D63
+  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // TM
+};
+
+static const uint8_t orders_128x128[1] = { 0 };
+static const uint8_t orders_128x64[2] = { 0, 1 };
+static const uint8_t orders_64x128[2] = { 0, 1 };
+static const uint8_t orders_64x64[4] = {
+  0, 1, 2, 3,
+};
+static const uint8_t orders_64x32[8] = {
+  0, 2, 1, 3, 4, 6, 5, 7,
+};
+static const uint8_t orders_32x64[8] = {
+  0, 1, 2, 3, 4, 5, 6, 7,
+};
+static const uint8_t orders_32x32[16] = {
+  0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15,
+};
+static const uint8_t orders_32x16[32] = {
+  0,  2,  8,  10, 1,  3,  9,  11, 4,  6,  12, 14, 5,  7,  13, 15,
+  16, 18, 24, 26, 17, 19, 25, 27, 20, 22, 28, 30, 21, 23, 29, 31,
+};
+static const uint8_t orders_16x32[32] = {
+  0,  1,  2,  3,  8,  9,  10, 11, 4,  5,  6,  7,  12, 13, 14, 15,
+  16, 17, 18, 19, 24, 25, 26, 27, 20, 21, 22, 23, 28, 29, 30, 31,
+};
+static const uint8_t orders_16x16[64] = {
+  0,  1,  4,  5,  16, 17, 20, 21, 2,  3,  6,  7,  18, 19, 22, 23,
+  8,  9,  12, 13, 24, 25, 28, 29, 10, 11, 14, 15, 26, 27, 30, 31,
+  32, 33, 36, 37, 48, 49, 52, 53, 34, 35, 38, 39, 50, 51, 54, 55,
+  40, 41, 44, 45, 56, 57, 60, 61, 42, 43, 46, 47, 58, 59, 62, 63,
+};
+
+#if CONFIG_EXT_PARTITION
+static const uint8_t orders_16x8[128] = {
+  0,  2,  8,  10, 32,  34,  40,  42,  1,  3,  9,  11, 33,  35,  41,  43,
+  4,  6,  12, 14, 36,  38,  44,  46,  5,  7,  13, 15, 37,  39,  45,  47,
+  16, 18, 24, 26, 48,  50,  56,  58,  17, 19, 25, 27, 49,  51,  57,  59,
+  20, 22, 28, 30, 52,  54,  60,  62,  21, 23, 29, 31, 53,  55,  61,  63,
+  64, 66, 72, 74, 96,  98,  104, 106, 65, 67, 73, 75, 97,  99,  105, 107,
+  68, 70, 76, 78, 100, 102, 108, 110, 69, 71, 77, 79, 101, 103, 109, 111,
+  80, 82, 88, 90, 112, 114, 120, 122, 81, 83, 89, 91, 113, 115, 121, 123,
+  84, 86, 92, 94, 116, 118, 124, 126, 85, 87, 93, 95, 117, 119, 125, 127,
+};
+static const uint8_t orders_8x16[128] = {
+  0,  1,  2,  3,  8,  9,  10, 11, 32,  33,  34,  35,  40,  41,  42,  43,
+  4,  5,  6,  7,  12, 13, 14, 15, 36,  37,  38,  39,  44,  45,  46,  47,
+  16, 17, 18, 19, 24, 25, 26, 27, 48,  49,  50,  51,  56,  57,  58,  59,
+  20, 21, 22, 23, 28, 29, 30, 31, 52,  53,  54,  55,  60,  61,  62,  63,
+  64, 65, 66, 67, 72, 73, 74, 75, 96,  97,  98,  99,  104, 105, 106, 107,
+  68, 69, 70, 71, 76, 77, 78, 79, 100, 101, 102, 103, 108, 109, 110, 111,
+  80, 81, 82, 83, 88, 89, 90, 91, 112, 113, 114, 115, 120, 121, 122, 123,
+  84, 85, 86, 87, 92, 93, 94, 95, 116, 117, 118, 119, 124, 125, 126, 127,
+};
+static const uint8_t orders_8x8[256] = {
+  0,   1,   4,   5,   16,  17,  20,  21,  64,  65,  68,  69,  80,  81,  84,
+  85,  2,   3,   6,   7,   18,  19,  22,  23,  66,  67,  70,  71,  82,  83,
+  86,  87,  8,   9,   12,  13,  24,  25,  28,  29,  72,  73,  76,  77,  88,
+  89,  92,  93,  10,  11,  14,  15,  26,  27,  30,  31,  74,  75,  78,  79,
+  90,  91,  94,  95,  32,  33,  36,  37,  48,  49,  52,  53,  96,  97,  100,
+  101, 112, 113, 116, 117, 34,  35,  38,  39,  50,  51,  54,  55,  98,  99,
+  102, 103, 114, 115, 118, 119, 40,  41,  44,  45,  56,  57,  60,  61,  104,
+  105, 108, 109, 120, 121, 124, 125, 42,  43,  46,  47,  58,  59,  62,  63,
+  106, 107, 110, 111, 122, 123, 126, 127, 128, 129, 132, 133, 144, 145, 148,
+  149, 192, 193, 196, 197, 208, 209, 212, 213, 130, 131, 134, 135, 146, 147,
+  150, 151, 194, 195, 198, 199, 210, 211, 214, 215, 136, 137, 140, 141, 152,
+  153, 156, 157, 200, 201, 204, 205, 216, 217, 220, 221, 138, 139, 142, 143,
+  154, 155, 158, 159, 202, 203, 206, 207, 218, 219, 222, 223, 160, 161, 164,
+  165, 176, 177, 180, 181, 224, 225, 228, 229, 240, 241, 244, 245, 162, 163,
+  166, 167, 178, 179, 182, 183, 226, 227, 230, 231, 242, 243, 246, 247, 168,
+  169, 172, 173, 184, 185, 188, 189, 232, 233, 236, 237, 248, 249, 252, 253,
+  170, 171, 174, 175, 186, 187, 190, 191, 234, 235, 238, 239, 250, 251, 254,
+  255,
+};
+
+/* clang-format off */
+static const uint8_t *const orders[BLOCK_SIZES] = {
+  //                              4X4
+                                  orders_8x8,
+  // 4X8,         8X4,            8X8
+  orders_8x8,     orders_8x8,     orders_8x8,
+  // 8X16,        16X8,           16X16
+  orders_8x16,    orders_16x8,    orders_16x16,
+  // 16X32,       32X16,          32X32
+  orders_16x32,   orders_32x16,   orders_32x32,
+  // 32X64,       64X32,          64X64
+  orders_32x64,   orders_64x32,   orders_64x64,
+  // 64x128,      128x64,         128x128
+  orders_64x128,  orders_128x64,  orders_128x128
+};
+/* clang-format on */
+#else
+/* clang-format off */
+static const uint8_t *const orders[BLOCK_SIZES] = {
+  //                              4X4
+                                  orders_16x16,
+  // 4X8,         8X4,            8X8
+  orders_16x16,   orders_16x16,   orders_16x16,
+  // 8X16,        16X8,           16X16
+  orders_16x32,   orders_32x16,   orders_32x32,
+  // 16X32,       32X16,          32X32
+  orders_32x64,   orders_64x32,   orders_64x64,
+  // 32X64,       64X32,          64X64
+  orders_64x128,  orders_128x64,  orders_128x128
+};
+/* clang-format on */
+#endif  // CONFIG_EXT_PARTITION
+
+#if CONFIG_EXT_PARTITION_TYPES
+static const uint8_t orders_verta_64x64[4] = {
+  0, 2, 1, 2,
+};
+static const uint8_t orders_verta_32x32[16] = {
+  0, 2, 4, 6, 1, 2, 5, 6, 8, 10, 12, 14, 9, 10, 13, 14,
+};
+static const uint8_t orders_verta_16x16[64] = {
+  0,  2,  4,  6,  16, 18, 20, 22, 1,  2,  5,  6,  17, 18, 21, 22,
+  8,  10, 12, 14, 24, 26, 28, 30, 9,  10, 13, 14, 25, 26, 29, 30,
+  32, 34, 36, 38, 48, 50, 52, 54, 33, 34, 37, 38, 49, 50, 53, 54,
+  40, 42, 44, 46, 56, 58, 60, 62, 41, 42, 45, 46, 57, 58, 61, 62,
+};
+#if CONFIG_EXT_PARTITION
+static const uint8_t orders_verta_8x8[256] = {
+  0,   2,   4,   6,   16,  18,  20,  22,  64,  66,  68,  70,  80,  82,  84,
+  86,  1,   2,   5,   6,   17,  18,  21,  22,  65,  66,  69,  70,  81,  82,
+  85,  86,  8,   10,  12,  14,  24,  26,  28,  30,  72,  74,  76,  78,  88,
+  90,  92,  94,  9,   10,  13,  14,  25,  26,  29,  30,  73,  74,  77,  78,
+  89,  90,  93,  94,  32,  34,  36,  38,  48,  50,  52,  54,  96,  98,  100,
+  102, 112, 114, 116, 118, 33,  34,  37,  38,  49,  50,  53,  54,  97,  98,
+  101, 102, 113, 114, 117, 118, 40,  42,  44,  46,  56,  58,  60,  62,  104,
+  106, 108, 110, 120, 122, 124, 126, 41,  42,  45,  46,  57,  58,  61,  62,
+  105, 106, 109, 110, 121, 122, 125, 126, 128, 130, 132, 134, 144, 146, 148,
+  150, 192, 194, 196, 198, 208, 210, 212, 214, 129, 130, 133, 134, 145, 146,
+  149, 150, 193, 194, 197, 198, 209, 210, 213, 214, 136, 138, 140, 142, 152,
+  154, 156, 158, 200, 202, 204, 206, 216, 218, 220, 222, 137, 138, 141, 142,
+  153, 154, 157, 158, 201, 202, 205, 206, 217, 218, 221, 222, 160, 162, 164,
+  166, 176, 178, 180, 182, 224, 226, 228, 230, 240, 242, 244, 246, 161, 162,
+  165, 166, 177, 178, 181, 182, 225, 226, 229, 230, 241, 242, 245, 246, 168,
+  170, 172, 174, 184, 186, 188, 190, 232, 234, 236, 238, 248, 250, 252, 254,
+  169, 170, 173, 174, 185, 186, 189, 190, 233, 234, 237, 238, 249, 250, 253,
+  254,
+};
+
+/* clang-format off */
+static const uint8_t *const orders_verta[BLOCK_SIZES] = {
+  //                                  4X4
+                                      orders_verta_8x8,
+  // 4X8,           8X4,              8X8
+  orders_verta_8x8, orders_verta_8x8, orders_verta_8x8,
+  // 8X16,          16X8,             16X16
+  orders_8x16,      orders_16x8,      orders_verta_16x16,
+  // 16X32,         32X16,            32X32
+  orders_16x32,     orders_32x16,     orders_verta_32x32,
+  // 32X64,         64X32,            64X64
+  orders_32x64,     orders_64x32,     orders_verta_64x64,
+  // 64x128,        128x64,           128x128
+  orders_64x128,    orders_128x64,    orders_128x128
+};
+/* clang-format on */
+#else
+/* clang-format off */
+static const uint8_t *const orders_verta[BLOCK_SIZES] = {
+  //                                      4X4
+                                          orders_verta_16x16,
+  // 4X8,             8X4,                8X8
+  orders_verta_16x16, orders_verta_16x16, orders_verta_16x16,
+  // 8X16,            16X8,               16X16
+  orders_16x32,       orders_32x16,       orders_verta_32x32,
+  // 16X32,           32X16,              32X32
+  orders_32x64,       orders_64x32,       orders_verta_64x64,
+  // 32X64,           64X32,              64X64
+  orders_64x128,      orders_128x64,      orders_128x128
+};
+/* clang-format on */
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_EXT_PARTITION_TYPES
+
+static int vp10_has_right(BLOCK_SIZE bsize, int mi_row, int mi_col,
+                          int right_available,
+#if CONFIG_EXT_PARTITION_TYPES
+                          PARTITION_TYPE partition,
+#endif
+                          TX_SIZE txsz, int y, int x, int ss_x) {
+  const int wl = mi_width_log2_lookup[bsize];
+  const int w = VPXMAX(num_4x4_blocks_wide_lookup[bsize] >> ss_x, 1);
+  const int step = 1 << txsz;
+
+  if (!right_available) {
+    return 0;
+  } else {
+    // Handle block size 4x8 and 4x4
+    if (ss_x == 0 && num_4x4_blocks_wide_lookup[bsize] < 2 && x == 0) return 1;
+
+    if (y == 0) {
+      const int hl = mi_height_log2_lookup[bsize];
+      const uint8_t *order;
+      int my_order, tr_order;
+#if CONFIG_EXT_PARTITION_TYPES
+      if (partition == PARTITION_VERT_A)
+        order = orders_verta[bsize];
+      else
+#endif  // CONFIG_EXT_PARTITION_TYPES
+        order = orders[bsize];
+
+      if (x + step < w) return 1;
+
+      mi_row = (mi_row & MAX_MIB_MASK) >> hl;
+      mi_col = (mi_col & MAX_MIB_MASK) >> wl;
+
+      // If top row of coding unit
+      if (mi_row == 0) return 1;
+
+      // If rightmost column of coding unit
+      if (((mi_col + 1) << wl) >= MAX_MIB_SIZE) return 0;
+
+      my_order = order[((mi_row + 0) << (MAX_MIB_SIZE_LOG2 - wl)) + mi_col + 0];
+      tr_order = order[((mi_row - 1) << (MAX_MIB_SIZE_LOG2 - wl)) + mi_col + 1];
+
+      return my_order > tr_order;
+    } else {
+      return x + step < w;
+    }
+  }
+}
+
+static int vp10_has_bottom(BLOCK_SIZE bsize, int mi_row, int mi_col,
+                           int bottom_available, TX_SIZE txsz, int y, int x,
+                           int ss_y) {
+  if (!bottom_available || x != 0) {
+    return 0;
+  } else {
+    const int wl = mi_width_log2_lookup[bsize];
+    const int hl = mi_height_log2_lookup[bsize];
+    const int h = 1 << (hl + 1 - ss_y);
+    const int step = 1 << txsz;
+    const uint8_t *order = orders[bsize];
+    int my_order, bl_order;
+
+    // Handle block size 8x4 and 4x4
+    if (ss_y == 0 && num_4x4_blocks_high_lookup[bsize] < 2 && y == 0) return 1;
+
+    if (y + step < h) return 1;
+
+    mi_row = (mi_row & MAX_MIB_MASK) >> hl;
+    mi_col = (mi_col & MAX_MIB_MASK) >> wl;
+
+    if (mi_col == 0)
+      return (mi_row << (hl + !ss_y)) + y + step < (MAX_MIB_SIZE << !ss_y);
+
+    if (((mi_row + 1) << hl) >= MAX_MIB_SIZE) return 0;
+
+    my_order = order[((mi_row + 0) << (MAX_MIB_SIZE_LOG2 - wl)) + mi_col + 0];
+    bl_order = order[((mi_row + 1) << (MAX_MIB_SIZE_LOG2 - wl)) + mi_col - 1];
+
+    return bl_order < my_order;
+  }
+}
+
+typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
+                              const uint8_t *above, const uint8_t *left);
+
+static intra_pred_fn pred[INTRA_MODES][TX_SIZES];
+static intra_pred_fn dc_pred[2][2][TX_SIZES];
+
+#if CONFIG_VP9_HIGHBITDEPTH
+typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
+                                   const uint16_t *above, const uint16_t *left,
+                                   int bd);
+static intra_high_pred_fn pred_high[INTRA_MODES][4];
+static intra_high_pred_fn dc_pred_high[2][2][4];
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+static void vp10_init_intra_predictors_internal(void) {
+#define INIT_NO_4X4(p, type)                  \
+  p[TX_8X8] = vpx_##type##_predictor_8x8;     \
+  p[TX_16X16] = vpx_##type##_predictor_16x16; \
+  p[TX_32X32] = vpx_##type##_predictor_32x32
+
+#define INIT_ALL_SIZES(p, type)           \
+  p[TX_4X4] = vpx_##type##_predictor_4x4; \
+  INIT_NO_4X4(p, type)
+
+  INIT_ALL_SIZES(pred[V_PRED], v);
+  INIT_ALL_SIZES(pred[H_PRED], h);
+  INIT_ALL_SIZES(pred[D207_PRED], d207e);
+  INIT_ALL_SIZES(pred[D45_PRED], d45e);
+  INIT_ALL_SIZES(pred[D63_PRED], d63e);
+  INIT_ALL_SIZES(pred[D117_PRED], d117);
+  INIT_ALL_SIZES(pred[D135_PRED], d135);
+  INIT_ALL_SIZES(pred[D153_PRED], d153);
+  INIT_ALL_SIZES(pred[TM_PRED], tm);
+
+  INIT_ALL_SIZES(dc_pred[0][0], dc_128);
+  INIT_ALL_SIZES(dc_pred[0][1], dc_top);
+  INIT_ALL_SIZES(dc_pred[1][0], dc_left);
+  INIT_ALL_SIZES(dc_pred[1][1], dc);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  INIT_ALL_SIZES(pred_high[V_PRED], highbd_v);
+  INIT_ALL_SIZES(pred_high[H_PRED], highbd_h);
+  INIT_ALL_SIZES(pred_high[D207_PRED], highbd_d207e);
+  INIT_ALL_SIZES(pred_high[D45_PRED], highbd_d45e);
+  INIT_ALL_SIZES(pred_high[D63_PRED], highbd_d63e);
+  INIT_ALL_SIZES(pred_high[D117_PRED], highbd_d117);
+  INIT_ALL_SIZES(pred_high[D135_PRED], highbd_d135);
+  INIT_ALL_SIZES(pred_high[D153_PRED], highbd_d153);
+  INIT_ALL_SIZES(pred_high[TM_PRED], highbd_tm);
+
+  INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128);
+  INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top);
+  INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left);
+  INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#undef intra_pred_allsizes
+}
+
+#if CONFIG_EXT_INTRA
+
+static const uint8_t ext_intra_extend_modes[FILTER_INTRA_MODES] = {
+  NEED_LEFT | NEED_ABOVE,  // FILTER_DC
+  NEED_LEFT | NEED_ABOVE,  // FILTER_V
+  NEED_LEFT | NEED_ABOVE,  // FILTER_H
+  NEED_LEFT | NEED_ABOVE,  // FILTER_D45
+  NEED_LEFT | NEED_ABOVE,  // FILTER_D135
+  NEED_LEFT | NEED_ABOVE,  // FILTER_D117
+  NEED_LEFT | NEED_ABOVE,  // FILTER_D153
+  NEED_LEFT | NEED_ABOVE,  // FILTER_D207
+  NEED_LEFT | NEED_ABOVE,  // FILTER_D63
+  NEED_LEFT | NEED_ABOVE,  // FILTER_TM
+};
+
+static int intra_subpel_interp(int base, int shift, const uint8_t *ref,
+                               int ref_start_idx, int ref_end_idx,
+                               INTRA_FILTER filter_type) {
+  int val, k, idx, filter_idx = 0;
+  const int16_t *filter = NULL;
+
+  if (filter_type == INTRA_FILTER_LINEAR) {
+    val = ref[base] * (256 - shift) + ref[base + 1] * shift;
+    val = ROUND_POWER_OF_TWO(val, 8);
+  } else {
+    filter_idx = ROUND_POWER_OF_TWO(shift, 8 - SUBPEL_BITS);
+    filter = vp10_intra_filter_kernels[filter_type][filter_idx];
+
+    if (filter_idx < (1 << SUBPEL_BITS)) {
+      val = 0;
+      for (k = 0; k < SUBPEL_TAPS; ++k) {
+        idx = base + 1 - (SUBPEL_TAPS / 2) + k;
+        idx = VPXMAX(VPXMIN(idx, ref_end_idx), ref_start_idx);
+        val += ref[idx] * filter[k];
+      }
+      val = ROUND_POWER_OF_TWO(val, FILTER_BITS);
+    } else {
+      val = ref[base + 1];
+    }
+  }
+
+  return val;
+}
+
+// Directional prediction, zone 1: 0 < angle < 90
+static void dr_prediction_z1(uint8_t *dst, ptrdiff_t stride, int bs,
+                             const uint8_t *above, const uint8_t *left, int dx,
+                             int dy, INTRA_FILTER filter_type) {
+  int r, c, x, base, shift, val;
+
+  (void)left;
+  (void)dy;
+  assert(dy == 1);
+  assert(dx < 0);
+
+  if (filter_type != INTRA_FILTER_LINEAR) {
+    const int pad_size = SUBPEL_TAPS >> 1;
+    int len;
+    DECLARE_ALIGNED(16, uint8_t, buf[SUBPEL_SHIFTS][MAX_SB_SIZE]);
+    DECLARE_ALIGNED(16, uint8_t, src[MAX_SB_SIZE + SUBPEL_TAPS]);
+    uint8_t flags[SUBPEL_SHIFTS];
+
+    memset(flags, 0, SUBPEL_SHIFTS * sizeof(flags[0]));
+    memset(src, above[0], pad_size * sizeof(above[0]));
+    memcpy(src + pad_size, above, 2 * bs * sizeof(above[0]));
+    memset(src + pad_size + 2 * bs, above[2 * bs - 1],
+           pad_size * sizeof(above[0]));
+    flags[0] = 1;
+    x = -dx;
+    for (r = 0; r < bs; ++r, dst += stride, x -= dx) {
+      base = x >> 8;
+      shift = x & 0xFF;
+      shift = ROUND_POWER_OF_TWO(shift, 8 - SUBPEL_BITS);
+      if (shift == SUBPEL_SHIFTS) {
+        base += 1;
+        shift = 0;
+      }
+      len = VPXMIN(bs, 2 * bs - 1 - base);
+      if (len <= 0) {
+        int i;
+        for (i = r; i < bs; ++i) {
+          memset(dst, above[2 * bs - 1], bs * sizeof(dst[0]));
+          dst += stride;
+        }
+        return;
+      }
+
+      if (len <= (bs >> 1) && !flags[shift]) {
+        base = x >> 8;
+        shift = x & 0xFF;
+        for (c = 0; c < len; ++c) {
+          val = intra_subpel_interp(base, shift, above, 0, 2 * bs - 1,
+                                    filter_type);
+          dst[c] = clip_pixel(val);
+          ++base;
+        }
+      } else {
+        if (!flags[shift]) {
+          const int16_t *filter = vp10_intra_filter_kernels[filter_type][shift];
+          vpx_convolve8_horiz(src + pad_size, 2 * bs, buf[shift], 2 * bs,
+                              filter, 16, NULL, 16, 2 * bs,
+                              2 * bs < 16 ? 2 : 1);
+          flags[shift] = 1;
+        }
+        memcpy(dst, shift == 0 ? src + pad_size + base : &buf[shift][base],
+               len * sizeof(dst[0]));
+      }
+
+      if (len < bs)
+        memset(dst + len, above[2 * bs - 1], (bs - len) * sizeof(dst[0]));
+    }
+    return;
+  }
+
+  // For linear filter, C code is faster.
+  x = -dx;
+  for (r = 0; r < bs; ++r, dst += stride, x -= dx) {
+    base = x >> 8;
+    shift = x & 0xFF;
+
+    if (base >= 2 * bs - 1) {
+      int i;
+      for (i = r; i < bs; ++i) {
+        memset(dst, above[2 * bs - 1], bs * sizeof(dst[0]));
+        dst += stride;
+      }
+      return;
+    }
+
+    for (c = 0; c < bs; ++c, ++base) {
+      if (base < 2 * bs - 1) {
+        val = above[base] * (256 - shift) + above[base + 1] * shift;
+        val = ROUND_POWER_OF_TWO(val, 8);
+        dst[c] = clip_pixel(val);
+      } else {
+        dst[c] = above[2 * bs - 1];
+      }
+    }
+  }
+}
+
+// Directional prediction, zone 2: 90 < angle < 180
+static void dr_prediction_z2(uint8_t *dst, ptrdiff_t stride, int bs,
+                             const uint8_t *above, const uint8_t *left, int dx,
+                             int dy, INTRA_FILTER filter_type) {
+  int r, c, x, y, shift1, shift2, val, base1, base2;
+
+  assert(dx > 0);
+  assert(dy > 0);
+
+  x = -dx;
+  for (r = 0; r < bs; ++r, x -= dx, dst += stride) {
+    base1 = x >> 8;
+    y = (r << 8) - dy;
+    for (c = 0; c < bs; ++c, ++base1, y -= dy) {
+      if (base1 >= -1) {
+        shift1 = x & 0xFF;
+        val =
+            intra_subpel_interp(base1, shift1, above, -1, bs - 1, filter_type);
+      } else {
+        base2 = y >> 8;
+        if (base2 >= 0) {
+          shift2 = y & 0xFF;
+          val =
+              intra_subpel_interp(base2, shift2, left, 0, bs - 1, filter_type);
+        } else {
+          val = left[0];
+        }
+      }
+      dst[c] = clip_pixel(val);
+    }
+  }
+}
+
+// Directional prediction, zone 3: 180 < angle < 270
+static void dr_prediction_z3(uint8_t *dst, ptrdiff_t stride, int bs,
+                             const uint8_t *above, const uint8_t *left, int dx,
+                             int dy, INTRA_FILTER filter_type) {
+  int r, c, y, base, shift, val;
+
+  (void)above;
+  (void)dx;
+
+  assert(dx == 1);
+  assert(dy < 0);
+
+  if (filter_type != INTRA_FILTER_LINEAR) {
+    const int pad_size = SUBPEL_TAPS >> 1;
+    int len, i;
+    DECLARE_ALIGNED(16, uint8_t, buf[MAX_SB_SIZE][4 * SUBPEL_SHIFTS]);
+    DECLARE_ALIGNED(16, uint8_t, src[(MAX_SB_SIZE + SUBPEL_TAPS) * 4]);
+    uint8_t flags[SUBPEL_SHIFTS];
+
+    memset(flags, 0, SUBPEL_SHIFTS * sizeof(flags[0]));
+    for (i = 0; i < pad_size; ++i) src[4 * i] = left[0];
+    for (i = 0; i < 2 * bs; ++i) src[4 * (i + pad_size)] = left[i];
+    for (i = 0; i < pad_size; ++i)
+      src[4 * (i + 2 * bs + pad_size)] = left[2 * bs - 1];
+    flags[0] = 1;
+    y = -dy;
+    for (c = 0; c < bs; ++c, y -= dy) {
+      base = y >> 8;
+      shift = y & 0xFF;
+      shift = ROUND_POWER_OF_TWO(shift, 8 - SUBPEL_BITS);
+      if (shift == SUBPEL_SHIFTS) {
+        base += 1;
+        shift = 0;
+      }
+      len = VPXMIN(bs, 2 * bs - 1 - base);
+
+      if (len <= 0) {
+        for (r = 0; r < bs; ++r) {
+          dst[r * stride + c] = left[2 * bs - 1];
+        }
+        continue;
+      }
+
+      if (len <= (bs >> 1) && !flags[shift]) {
+        base = y >> 8;
+        shift = y & 0xFF;
+        for (r = 0; r < len; ++r) {
+          val = intra_subpel_interp(base, shift, left, 0, 2 * bs - 1,
+                                    filter_type);
+          dst[r * stride + c] = clip_pixel(val);
+          ++base;
+        }
+      } else {
+        if (!flags[shift]) {
+          const int16_t *filter = vp10_intra_filter_kernels[filter_type][shift];
+          vpx_convolve8_vert(src + 4 * pad_size, 4, buf[0] + 4 * shift,
+                             4 * SUBPEL_SHIFTS, NULL, 16, filter, 16,
+                             2 * bs < 16 ? 4 : 4, 2 * bs);
+          flags[shift] = 1;
+        }
+
+        if (shift == 0) {
+          for (r = 0; r < len; ++r) {
+            dst[r * stride + c] = left[r + base];
+          }
+        } else {
+          for (r = 0; r < len; ++r) {
+            dst[r * stride + c] = buf[r + base][4 * shift];
+          }
+        }
+      }
+
+      if (len < bs) {
+        for (r = len; r < bs; ++r) {
+          dst[r * stride + c] = left[2 * bs - 1];
+        }
+      }
+    }
+    return;
+  }
+
+  // For linear filter, C code is faster.
+  y = -dy;
+  for (c = 0; c < bs; ++c, y -= dy) {
+    base = y >> 8;
+    shift = y & 0xFF;
+
+    for (r = 0; r < bs; ++r, ++base) {
+      if (base < 2 * bs - 1) {
+        val = left[base] * (256 - shift) + left[base + 1] * shift;
+        val = ROUND_POWER_OF_TWO(val, 8);
+        dst[r * stride + c] = clip_pixel(val);
+      } else {
+        for (; r < bs; ++r) dst[r * stride + c] = left[2 * bs - 1];
+        break;
+      }
+    }
+  }
+}
+
+// Get the shift (up-scaled by 256) in X w.r.t a unit change in Y.
+// If angle > 0 && angle < 90, dx = -((int)(256 / t));
+// If angle > 90 && angle < 180, dx = (int)(256 / t);
+// If angle > 180 && angle < 270, dx = 1;
+static inline int get_dx(int angle) {
+  if (angle > 0 && angle < 90) {
+    return -dr_intra_derivative[angle];
+  } else if (angle > 90 && angle < 180) {
+    return dr_intra_derivative[180 - angle];
+  } else {
+    // In this case, we are not really going to use dx. We may return any value.
+    return 1;
+  }
+}
+
+// Get the shift (up-scaled by 256) in Y w.r.t a unit change in X.
+// If angle > 0 && angle < 90, dy = 1;
+// If angle > 90 && angle < 180, dy = (int)(256 * t);
+// If angle > 180 && angle < 270, dy = -((int)(256 * t));
+static inline int get_dy(int angle) {
+  if (angle > 90 && angle < 180) {
+    return dr_intra_derivative[angle - 90];
+  } else if (angle > 180 && angle < 270) {
+    return -dr_intra_derivative[270 - angle];
+  } else {
+    // In this case, we are not really going to use dy. We may return any value.
+    return 1;
+  }
+}
+
+static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
+                         const uint8_t *above, const uint8_t *left, int angle,
+                         INTRA_FILTER filter_type) {
+  const int dx = get_dx(angle);
+  const int dy = get_dy(angle);
+  const int bs = 4 * num_4x4_blocks_wide_txsize_lookup[tx_size];
+  assert(angle > 0 && angle < 270);
+
+  if (angle > 0 && angle < 90) {
+    dr_prediction_z1(dst, stride, bs, above, left, dx, dy, filter_type);
+  } else if (angle > 90 && angle < 180) {
+    dr_prediction_z2(dst, stride, bs, above, left, dx, dy, filter_type);
+  } else if (angle > 180 && angle < 270) {
+    dr_prediction_z3(dst, stride, bs, above, left, dx, dy, filter_type);
+  } else if (angle == 90) {
+    pred[V_PRED][tx_size](dst, stride, above, left);
+  } else if (angle == 180) {
+    pred[H_PRED][tx_size](dst, stride, above, left);
+  }
+}
+
+static void filter_intra_predictors_4tap(uint8_t *dst, ptrdiff_t stride, int bs,
+                                         const uint8_t *above,
+                                         const uint8_t *left, int mode) {
+  int k, r, c;
+  int pred[33][65];
+  int mean, ipred;
+  const TX_SIZE tx_size =
+      (bs == 32) ? TX_32X32
+                 : ((bs == 16) ? TX_16X16 : ((bs == 8) ? TX_8X8 : (TX_4X4)));
+  const int c0 = filter_intra_taps_4[tx_size][mode][0];
+  const int c1 = filter_intra_taps_4[tx_size][mode][1];
+  const int c2 = filter_intra_taps_4[tx_size][mode][2];
+  const int c3 = filter_intra_taps_4[tx_size][mode][3];
+
+  k = 0;
+  mean = 0;
+  while (k < bs) {
+    mean = mean + (int)left[k];
+    mean = mean + (int)above[k];
+    k++;
+  }
+  mean = (mean + bs) / (2 * bs);
+
+  for (r = 0; r < bs; ++r) pred[r + 1][0] = (int)left[r] - mean;
+
+  for (c = 0; c < 2 * bs + 1; ++c) pred[0][c] = (int)above[c - 1] - mean;
+
+  for (r = 1; r < bs + 1; ++r)
+    for (c = 1; c < 2 * bs + 1 - r; ++c) {
+      ipred = c0 * pred[r - 1][c] + c1 * pred[r][c - 1] +
+              c2 * pred[r - 1][c - 1] + c3 * pred[r - 1][c + 1];
+      pred[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_PREC_BITS);
+    }
+
+  for (r = 0; r < bs; ++r) {
+    for (c = 0; c < bs; ++c) {
+      ipred = pred[r + 1][c + 1] + mean;
+      dst[c] = clip_pixel(ipred);
+    }
+    dst += stride;
+  }
+}
+
+void vp10_dc_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
+                                const uint8_t *above, const uint8_t *left) {
+  filter_intra_predictors_4tap(dst, stride, bs, above, left, DC_PRED);
+}
+
+void vp10_v_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
+                               const uint8_t *above, const uint8_t *left) {
+  filter_intra_predictors_4tap(dst, stride, bs, above, left, V_PRED);
+}
+
+void vp10_h_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
+                               const uint8_t *above, const uint8_t *left) {
+  filter_intra_predictors_4tap(dst, stride, bs, above, left, H_PRED);
+}
+
+void vp10_d45_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
+                                 const uint8_t *above, const uint8_t *left) {
+  filter_intra_predictors_4tap(dst, stride, bs, above, left, D45_PRED);
+}
+
+void vp10_d135_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
+                                  const uint8_t *above, const uint8_t *left) {
+  filter_intra_predictors_4tap(dst, stride, bs, above, left, D135_PRED);
+}
+
+void vp10_d117_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
+                                  const uint8_t *above, const uint8_t *left) {
+  filter_intra_predictors_4tap(dst, stride, bs, above, left, D117_PRED);
+}
+
+void vp10_d153_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
+                                  const uint8_t *above, const uint8_t *left) {
+  filter_intra_predictors_4tap(dst, stride, bs, above, left, D153_PRED);
+}
+
+void vp10_d207_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
+                                  const uint8_t *above, const uint8_t *left) {
+  filter_intra_predictors_4tap(dst, stride, bs, above, left, D207_PRED);
+}
+
+void vp10_d63_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
+                                 const uint8_t *above, const uint8_t *left) {
+  filter_intra_predictors_4tap(dst, stride, bs, above, left, D63_PRED);
+}
+
+void vp10_tm_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
+                                const uint8_t *above, const uint8_t *left) {
+  filter_intra_predictors_4tap(dst, stride, bs, above, left, TM_PRED);
+}
+
+static void filter_intra_predictors(int mode, uint8_t *dst, ptrdiff_t stride,
+                                    int bs, const uint8_t *above,
+                                    const uint8_t *left) {
+  switch (mode) {
+    case DC_PRED: vp10_dc_filter_predictor(dst, stride, bs, above, left); break;
+    case V_PRED: vp10_v_filter_predictor(dst, stride, bs, above, left); break;
+    case H_PRED: vp10_h_filter_predictor(dst, stride, bs, above, left); break;
+    case D45_PRED:
+      vp10_d45_filter_predictor(dst, stride, bs, above, left);
+      break;
+    case D135_PRED:
+      vp10_d135_filter_predictor(dst, stride, bs, above, left);
+      break;
+    case D117_PRED:
+      vp10_d117_filter_predictor(dst, stride, bs, above, left);
+      break;
+    case D153_PRED:
+      vp10_d153_filter_predictor(dst, stride, bs, above, left);
+      break;
+    case D207_PRED:
+      vp10_d207_filter_predictor(dst, stride, bs, above, left);
+      break;
+    case D63_PRED:
+      vp10_d63_filter_predictor(dst, stride, bs, above, left);
+      break;
+    case TM_PRED: vp10_tm_filter_predictor(dst, stride, bs, above, left); break;
+    default: assert(0);
+  }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static int highbd_intra_subpel_interp(int base, int shift, const uint16_t *ref,
+                                      int ref_start_idx, int ref_end_idx,
+                                      INTRA_FILTER filter_type) {
+  int val, k, idx, filter_idx = 0;
+  const int16_t *filter = NULL;
+
+  if (filter_type == INTRA_FILTER_LINEAR) {
+    val = ref[base] * (256 - shift) + ref[base + 1] * shift;
+    val = ROUND_POWER_OF_TWO(val, 8);
+  } else {
+    filter_idx = ROUND_POWER_OF_TWO(shift, 8 - SUBPEL_BITS);
+    filter = vp10_intra_filter_kernels[filter_type][filter_idx];
+
+    if (filter_idx < (1 << SUBPEL_BITS)) {
+      val = 0;
+      for (k = 0; k < SUBPEL_TAPS; ++k) {
+        idx = base + 1 - (SUBPEL_TAPS / 2) + k;
+        idx = VPXMAX(VPXMIN(idx, ref_end_idx), ref_start_idx);
+        val += ref[idx] * filter[k];
+      }
+      val = ROUND_POWER_OF_TWO(val, FILTER_BITS);
+    } else {
+      val = ref[base + 1];
+    }
+  }
+
+  return val;
+}
+
+// Directional prediction, zone 1: 0 < angle < 90
+static void highbd_dr_prediction_z1(uint16_t *dst, ptrdiff_t stride, int bs,
+                                    const uint16_t *above, const uint16_t *left,
+                                    int dx, int dy, int bd,
+                                    INTRA_FILTER filter_type) {
+  int r, c, x, y, base, shift, val;
+
+  (void)left;
+  (void)dy;
+  assert(dy == 1);
+  assert(dx < 0);
+
+  for (r = 0; r < bs; ++r) {
+    y = r + 1;
+    for (c = 0; c < bs; ++c) {
+      x = (c << 8) - y * dx;
+      base = x >> 8;
+      shift = x - (base << 8);
+      if (base < 2 * bs - 1) {
+        val = highbd_intra_subpel_interp(base, shift, above, 0, 2 * bs - 1,
+                                         filter_type);
+        dst[c] = clip_pixel_highbd(val, bd);
+      } else {
+        dst[c] = above[2 * bs - 1];
+      }
+    }
+    dst += stride;
+  }
+}
+
+// Directional prediction, zone 2: 90 < angle < 180
+static void highbd_dr_prediction_z2(uint16_t *dst, ptrdiff_t stride, int bs,
+                                    const uint16_t *above, const uint16_t *left,
+                                    int dx, int dy, int bd,
+                                    INTRA_FILTER filter_type) {
+  int r, c, x, y, shift, val, base;
+
+  assert(dx > 0);
+  assert(dy > 0);
+
+  for (r = 0; r < bs; ++r) {
+    for (c = 0; c < bs; ++c) {
+      y = r + 1;
+      x = (c << 8) - y * dx;
+      base = x >> 8;
+      if (base >= -1) {
+        shift = x - (base << 8);
+        val = highbd_intra_subpel_interp(base, shift, above, -1, bs - 1,
+                                         filter_type);
+      } else {
+        x = c + 1;
+        y = (r << 8) - x * dy;
+        base = y >> 8;
+        if (base >= 0) {
+          shift = y - (base << 8);
+          val = highbd_intra_subpel_interp(base, shift, left, 0, bs - 1,
+                                           filter_type);
+        } else {
+          val = left[0];
+        }
+      }
+      dst[c] = clip_pixel_highbd(val, bd);
+    }
+    dst += stride;
+  }
+}
+
+// Directional prediction, zone 3: 180 < angle < 270
+static void highbd_dr_prediction_z3(uint16_t *dst, ptrdiff_t stride, int bs,
+                                    const uint16_t *above, const uint16_t *left,
+                                    int dx, int dy, int bd,
+                                    INTRA_FILTER filter_type) {
+  int r, c, x, y, base, shift, val;
+
+  (void)above;
+  (void)dx;
+  assert(dx == 1);
+  assert(dy < 0);
+
+  for (r = 0; r < bs; ++r) {
+    for (c = 0; c < bs; ++c) {
+      x = c + 1;
+      y = (r << 8) - x * dy;
+      base = y >> 8;
+      shift = y - (base << 8);
+      if (base < 2 * bs - 1) {
+        val = highbd_intra_subpel_interp(base, shift, left, 0, 2 * bs - 1,
+                                         filter_type);
+        dst[c] = clip_pixel_highbd(val, bd);
+      } else {
+        dst[c] = left[2 * bs - 1];
+      }
+    }
+    dst += stride;
+  }
+}
+
+static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
+                                      const uint16_t *above,
+                                      const uint16_t *left, int bd) {
+  int r;
+  (void)left;
+  (void)bd;
+  for (r = 0; r < bs; r++) {
+    memcpy(dst, above, bs * sizeof(uint16_t));
+    dst += stride;
+  }
+}
+
+static INLINE void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
+                                      const uint16_t *above,
+                                      const uint16_t *left, int bd) {
+  int r;
+  (void)above;
+  (void)bd;
+  for (r = 0; r < bs; r++) {
+    vpx_memset16(dst, left[r], bs);
+    dst += stride;
+  }
+}
+
+static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
+                                const uint16_t *above, const uint16_t *left,
+                                int angle, int bd, INTRA_FILTER filter) {
+  const int dx = get_dx(angle);
+  const int dy = get_dy(angle);
+  assert(angle > 0 && angle < 270);
+
+  if (angle > 0 && angle < 90) {
+    highbd_dr_prediction_z1(dst, stride, bs, above, left, dx, dy, bd, filter);
+  } else if (angle > 90 && angle < 180) {
+    highbd_dr_prediction_z2(dst, stride, bs, above, left, dx, dy, bd, filter);
+  } else if (angle > 180 && angle < 270) {
+    highbd_dr_prediction_z3(dst, stride, bs, above, left, dx, dy, bd, filter);
+  } else if (angle == 90) {
+    highbd_v_predictor(dst, stride, bs, above, left, bd);
+  } else if (angle == 180) {
+    highbd_h_predictor(dst, stride, bs, above, left, bd);
+  }
+}
+
+static void highbd_filter_intra_predictors_4tap(uint16_t *dst, ptrdiff_t stride,
+                                                int bs, const uint16_t *above,
+                                                const uint16_t *left, int mode,
+                                                int bd) {
+  int k, r, c;
+  int pred[33][65];
+  int mean, ipred;
+  const TX_SIZE tx_size =
+      (bs == 32) ? TX_32X32
+                 : ((bs == 16) ? TX_16X16 : ((bs == 8) ? TX_8X8 : (TX_4X4)));
+  const int c0 = filter_intra_taps_4[tx_size][mode][0];
+  const int c1 = filter_intra_taps_4[tx_size][mode][1];
+  const int c2 = filter_intra_taps_4[tx_size][mode][2];
+  const int c3 = filter_intra_taps_4[tx_size][mode][3];
+
+  k = 0;
+  mean = 0;
+  while (k < bs) {
+    mean = mean + (int)left[k];
+    mean = mean + (int)above[k];
+    k++;
+  }
+  mean = (mean + bs) / (2 * bs);
+
+  for (r = 0; r < bs; ++r) pred[r + 1][0] = (int)left[r] - mean;
+
+  for (c = 0; c < 2 * bs + 1; ++c) pred[0][c] = (int)above[c - 1] - mean;
+
+  for (r = 1; r < bs + 1; ++r)
+    for (c = 1; c < 2 * bs + 1 - r; ++c) {
+      ipred = c0 * pred[r - 1][c] + c1 * pred[r][c - 1] +
+              c2 * pred[r - 1][c - 1] + c3 * pred[r - 1][c + 1];
+      pred[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_PREC_BITS);
+    }
+
+  for (r = 0; r < bs; ++r) {
+    for (c = 0; c < bs; ++c) {
+      ipred = pred[r + 1][c + 1] + mean;
+      dst[c] = clip_pixel_highbd(ipred, bd);
+    }
+    dst += stride;
+  }
+}
+
+void vp10_highbd_dc_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs,
+                                       const uint16_t *above,
+                                       const uint16_t *left, int bd) {
+  highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, DC_PRED,
+                                      bd);
+}
+
+void vp10_highbd_v_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs,
+                                      const uint16_t *above,
+                                      const uint16_t *left, int bd) {
+  highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, V_PRED, bd);
+}
+
+void vp10_highbd_h_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs,
+                                      const uint16_t *above,
+                                      const uint16_t *left, int bd) {
+  highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, H_PRED, bd);
+}
+
+void vp10_highbd_d45_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs,
+                                        const uint16_t *above,
+                                        const uint16_t *left, int bd) {
+  highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D45_PRED,
+                                      bd);
+}
+
+void vp10_highbd_d135_filter_predictor_c(uint16_t *dst, ptrdiff_t stride,
+                                         int bs, const uint16_t *above,
+                                         const uint16_t *left, int bd) {
+  highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D135_PRED,
+                                      bd);
+}
+
+void vp10_highbd_d117_filter_predictor_c(uint16_t *dst, ptrdiff_t stride,
+                                         int bs, const uint16_t *above,
+                                         const uint16_t *left, int bd) {
+  highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D117_PRED,
+                                      bd);
+}
+
+void vp10_highbd_d153_filter_predictor_c(uint16_t *dst, ptrdiff_t stride,
+                                         int bs, const uint16_t *above,
+                                         const uint16_t *left, int bd) {
+  highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D153_PRED,
+                                      bd);
+}
+
+void vp10_highbd_d207_filter_predictor_c(uint16_t *dst, ptrdiff_t stride,
+                                         int bs, const uint16_t *above,
+                                         const uint16_t *left, int bd) {
+  highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D207_PRED,
+                                      bd);
+}
+
+void vp10_highbd_d63_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs,
+                                        const uint16_t *above,
+                                        const uint16_t *left, int bd) {
+  highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D63_PRED,
+                                      bd);
+}
+
+void vp10_highbd_tm_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs,
+                                       const uint16_t *above,
+                                       const uint16_t *left, int bd) {
+  highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, TM_PRED,
+                                      bd);
+}
+
+static void highbd_filter_intra_predictors(int mode, uint16_t *dst,
+                                           ptrdiff_t stride, int bs,
+                                           const uint16_t *above,
+                                           const uint16_t *left, int bd) {
+  switch (mode) {
+    case DC_PRED:
+      vp10_highbd_dc_filter_predictor(dst, stride, bs, above, left, bd);
+      break;
+    case V_PRED:
+      vp10_highbd_v_filter_predictor(dst, stride, bs, above, left, bd);
+      break;
+    case H_PRED:
+      vp10_highbd_h_filter_predictor(dst, stride, bs, above, left, bd);
+      break;
+    case D45_PRED:
+      vp10_highbd_d45_filter_predictor(dst, stride, bs, above, left, bd);
+      break;
+    case D135_PRED:
+      vp10_highbd_d135_filter_predictor(dst, stride, bs, above, left, bd);
+      break;
+    case D117_PRED:
+      vp10_highbd_d117_filter_predictor(dst, stride, bs, above, left, bd);
+      break;
+    case D153_PRED:
+      vp10_highbd_d153_filter_predictor(dst, stride, bs, above, left, bd);
+      break;
+    case D207_PRED:
+      vp10_highbd_d207_filter_predictor(dst, stride, bs, above, left, bd);
+      break;
+    case D63_PRED:
+      vp10_highbd_d63_filter_predictor(dst, stride, bs, above, left, bd);
+      break;
+    case TM_PRED:
+      vp10_highbd_tm_filter_predictor(dst, stride, bs, above, left, bd);
+      break;
+    default: assert(0);
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // CONFIG_EXT_INTRA
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void build_intra_predictors_high(
+    const MACROBLOCKD *xd, const uint8_t *ref8, int ref_stride, uint8_t *dst8,
+    int dst_stride, PREDICTION_MODE mode, TX_SIZE tx_size, int n_top_px,
+    int n_topright_px, int n_left_px, int n_bottomleft_px, int plane) {
+  int i;
+  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+  DECLARE_ALIGNED(16, uint16_t, left_col[MAX_SB_SIZE]);
+  DECLARE_ALIGNED(16, uint16_t, above_data[MAX_SB_SIZE + 16]);
+  uint16_t *above_row = above_data + 16;
+  const uint16_t *const_above_row = above_row;
+  const int bs = 4 * num_4x4_blocks_wide_txsize_lookup[tx_size];
+  int need_left = extend_modes[mode] & NEED_LEFT;
+  int need_above = extend_modes[mode] & NEED_ABOVE;
+  const uint16_t *above_ref = ref - ref_stride;
+  int base = 128 << (xd->bd - 8);
+// 127 127 127 .. 127 127 127 127 127 127
+// 129  A   B  ..  Y   Z
+// 129  C   D  ..  W   X
+// 129  E   F  ..  U   V
+// 129  G   H  ..  S   T   T   T   T   T
+
+#if CONFIG_EXT_INTRA
+  const EXT_INTRA_MODE_INFO *ext_intra_mode_info =
+      &xd->mi[0]->mbmi.ext_intra_mode_info;
+  const EXT_INTRA_MODE ext_intra_mode =
+      ext_intra_mode_info->ext_intra_mode[plane != 0];
+  int p_angle = 0;
+
+  if (mode != DC_PRED && mode != TM_PRED &&
+      xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+    p_angle = mode_to_angle_map[mode] +
+              xd->mi[0]->mbmi.angle_delta[plane != 0] * ANGLE_STEP;
+    if (p_angle <= 90)
+      need_above = 1, need_left = 0;
+    else if (p_angle < 180)
+      need_above = 1, need_left = 1;
+    else
+      need_above = 0, need_left = 1;
+  }
+
+  if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+    EXT_INTRA_MODE ext_intra_mode =
+        ext_intra_mode_info->ext_intra_mode[plane != 0];
+    need_left = ext_intra_extend_modes[ext_intra_mode] & NEED_LEFT;
+    need_above = ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVE;
+  }
+#endif  // CONFIG_EXT_INTRA
+
+  (void)plane;
+  assert(n_top_px >= 0);
+  assert(n_topright_px >= 0);
+  assert(n_left_px >= 0);
+  assert(n_bottomleft_px >= 0);
+
+  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
+    int i;
+    const int val = (n_left_px == 0) ? base + 1 : base - 1;
+    for (i = 0; i < bs; ++i) {
+      vpx_memset16(dst, val, bs);
+      dst += dst_stride;
+    }
+    return;
+  }
+
+  // NEED_LEFT
+  if (need_left) {
+#if CONFIG_EXT_INTRA
+    int need_bottom;
+    if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+      need_bottom = 0;
+    } else if (mode != DC_PRED && mode != TM_PRED &&
+               xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+      need_bottom = p_angle > 180;
+    } else {
+      need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
+    }
+#else
+    const int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
+#endif  // CONFIG_EXT_INTRA
+    i = 0;
+    if (n_left_px > 0) {
+      for (; i < n_left_px; i++) left_col[i] = ref[i * ref_stride - 1];
+      if (need_bottom && n_bottomleft_px > 0) {
+        assert(i == bs);
+        for (; i < bs + n_bottomleft_px; i++)
+          left_col[i] = ref[i * ref_stride - 1];
+      }
+      if (i < (bs << need_bottom))
+        vpx_memset16(&left_col[i], left_col[i - 1], (bs << need_bottom) - i);
+    } else {
+      vpx_memset16(left_col, base + 1, bs << need_bottom);
+    }
+  }
+
+  // NEED_ABOVE
+  if (need_above) {
+#if CONFIG_EXT_INTRA
+    int need_right;
+    if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+      need_right = 1;
+    } else if (mode != DC_PRED && mode != TM_PRED &&
+               xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+      need_right = p_angle < 90;
+    } else {
+      need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
+    }
+#else
+    const int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
+#endif  // CONFIG_EXT_INTRA
+    if (n_top_px > 0) {
+      memcpy(above_row, above_ref, n_top_px * 2);
+      i = n_top_px;
+      if (need_right && n_topright_px > 0) {
+        assert(n_top_px == bs);
+        memcpy(above_row + bs, above_ref + bs, n_topright_px * 2);
+        i += n_topright_px;
+      }
+      if (i < (bs << need_right))
+        vpx_memset16(&above_row[i], above_row[i - 1], (bs << need_right) - i);
+    } else {
+      vpx_memset16(above_row, base - 1, bs << need_right);
+    }
+  }
+
+#if CONFIG_EXT_INTRA
+  if (ext_intra_mode_info->use_ext_intra_mode[plane != 0] ||
+      (extend_modes[mode] & NEED_ABOVELEFT) ||
+      (mode != DC_PRED && mode != TM_PRED &&
+       xd->mi[0]->mbmi.sb_type >= BLOCK_8X8)) {
+    above_row[-1] =
+        n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : base + 1) : base - 1;
+  }
+#else
+  if ((extend_modes[mode] & NEED_ABOVELEFT)) {
+    above_row[-1] =
+        n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : base + 1) : base - 1;
+  }
+#endif  // CONFIG_EXT_INTRA
+
+#if CONFIG_EXT_INTRA
+  if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+    highbd_filter_intra_predictors(ext_intra_mode, dst, dst_stride, bs,
+                                   const_above_row, left_col, xd->bd);
+    return;
+  }
+
+  if (mode != DC_PRED && mode != TM_PRED &&
+      xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+    INTRA_FILTER filter = INTRA_FILTER_LINEAR;
+    if (plane == 0 && vp10_is_intra_filter_switchable(p_angle))
+      filter = xd->mi[0]->mbmi.intra_filter;
+    highbd_dr_predictor(dst, dst_stride, bs, const_above_row, left_col, p_angle,
+                        xd->bd, filter);
+    return;
+  }
+#endif  // CONFIG_EXT_INTRA
+
+  // predict
+  if (mode == DC_PRED) {
+    dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
+        dst, dst_stride, const_above_row, left_col, xd->bd);
+  } else {
+    pred_high[mode][tx_size](dst, dst_stride, const_above_row, left_col,
+                             xd->bd);
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
+                                   int ref_stride, uint8_t *dst, int dst_stride,
+                                   PREDICTION_MODE mode, TX_SIZE tx_size,
+                                   int n_top_px, int n_topright_px,
+                                   int n_left_px, int n_bottomleft_px,
+                                   int plane) {
+  int i;
+  DECLARE_ALIGNED(16, uint8_t, left_col[MAX_SB_SIZE]);
+  const uint8_t *above_ref = ref - ref_stride;
+  DECLARE_ALIGNED(16, uint8_t, above_data[MAX_SB_SIZE + 16]);
+  uint8_t *above_row = above_data + 16;
+  const uint8_t *const_above_row = above_row;
+  const int bs = 4 * num_4x4_blocks_wide_txsize_lookup[tx_size];
+  int need_left = extend_modes[mode] & NEED_LEFT;
+  int need_above = extend_modes[mode] & NEED_ABOVE;
+#if CONFIG_EXT_INTRA
+  const EXT_INTRA_MODE_INFO *ext_intra_mode_info =
+      &xd->mi[0]->mbmi.ext_intra_mode_info;
+  const EXT_INTRA_MODE ext_intra_mode =
+      ext_intra_mode_info->ext_intra_mode[plane != 0];
+  int p_angle = 0;
+
+  if (mode != DC_PRED && mode != TM_PRED &&
+      xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+    p_angle = mode_to_angle_map[mode] +
+              xd->mi[0]->mbmi.angle_delta[plane != 0] * ANGLE_STEP;
+    if (p_angle <= 90)
+      need_above = 1, need_left = 0;
+    else if (p_angle < 180)
+      need_above = 1, need_left = 1;
+    else
+      need_above = 0, need_left = 1;
+  }
+
+  if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+    EXT_INTRA_MODE ext_intra_mode =
+        ext_intra_mode_info->ext_intra_mode[plane != 0];
+    need_left = ext_intra_extend_modes[ext_intra_mode] & NEED_LEFT;
+    need_above = ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVE;
+  }
+#endif  // CONFIG_EXT_INTRA
+
+  // 127 127 127 .. 127 127 127 127 127 127
+  // 129  A   B  ..  Y   Z
+  // 129  C   D  ..  W   X
+  // 129  E   F  ..  U   V
+  // 129  G   H  ..  S   T   T   T   T   T
+  // ..
+
+  (void)xd;
+  (void)plane;
+  assert(n_top_px >= 0);
+  assert(n_topright_px >= 0);
+  assert(n_left_px >= 0);
+  assert(n_bottomleft_px >= 0);
+
+  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
+    int i;
+    const int val = (n_left_px == 0) ? 129 : 127;
+    for (i = 0; i < bs; ++i) {
+      memset(dst, val, bs);
+      dst += dst_stride;
+    }
+    return;
+  }
+
+  // NEED_LEFT
+  if (need_left) {
+#if CONFIG_EXT_INTRA
+    int need_bottom;
+    if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+      need_bottom = 0;
+    } else if (mode != DC_PRED && mode != TM_PRED &&
+               xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+      need_bottom = p_angle > 180;
+    } else {
+      need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
+    }
+#else
+    const int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
+#endif  // CONFIG_EXT_INTRA
+    i = 0;
+    if (n_left_px > 0) {
+      for (; i < n_left_px; i++) left_col[i] = ref[i * ref_stride - 1];
+      if (need_bottom && n_bottomleft_px > 0) {
+        assert(i == bs);
+        for (; i < bs + n_bottomleft_px; i++)
+          left_col[i] = ref[i * ref_stride - 1];
+      }
+      if (i < (bs << need_bottom))
+        memset(&left_col[i], left_col[i - 1], (bs << need_bottom) - i);
+    } else {
+      memset(left_col, 129, bs << need_bottom);
+    }
+  }
+
+  // NEED_ABOVE
+  if (need_above) {
+#if CONFIG_EXT_INTRA
+    int need_right;
+    if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+      need_right = 1;
+    } else if (mode != DC_PRED && mode != TM_PRED &&
+               xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+      need_right = p_angle < 90;
+    } else {
+      need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
+    }
+#else
+    const int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
+#endif  // CONFIG_EXT_INTRA
+    if (n_top_px > 0) {
+      memcpy(above_row, above_ref, n_top_px);
+      i = n_top_px;
+      if (need_right && n_topright_px > 0) {
+        assert(n_top_px == bs);
+        memcpy(above_row + bs, above_ref + bs, n_topright_px);
+        i += n_topright_px;
+      }
+      if (i < (bs << need_right))
+        memset(&above_row[i], above_row[i - 1], (bs << need_right) - i);
+    } else {
+      memset(above_row, 127, bs << need_right);
+    }
+  }
+
+#if CONFIG_EXT_INTRA
+  if (ext_intra_mode_info->use_ext_intra_mode[plane != 0] ||
+      (extend_modes[mode] & NEED_ABOVELEFT) ||
+      (mode != DC_PRED && mode != TM_PRED &&
+       xd->mi[0]->mbmi.sb_type >= BLOCK_8X8)) {
+    above_row[-1] = n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : 129) : 127;
+  }
+#else
+  if ((extend_modes[mode] & NEED_ABOVELEFT)) {
+    above_row[-1] = n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : 129) : 127;
+  }
+#endif  // CONFIG_EXT_INTRA
+
+#if CONFIG_EXT_INTRA
+  if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+    filter_intra_predictors(ext_intra_mode, dst, dst_stride, bs,
+                            const_above_row, left_col);
+    return;
+  }
+
+  if (mode != DC_PRED && mode != TM_PRED &&
+      xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+    INTRA_FILTER filter = INTRA_FILTER_LINEAR;
+    if (plane == 0 && vp10_is_intra_filter_switchable(p_angle))
+      filter = xd->mi[0]->mbmi.intra_filter;
+    dr_predictor(dst, dst_stride, tx_size, const_above_row, left_col, p_angle,
+                 filter);
+    return;
+  }
+#endif  // CONFIG_EXT_INTRA
+
+  // predict
+  if (mode == DC_PRED) {
+    dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride,
+                                                  const_above_row, left_col);
+  } else {
+    pred[mode][tx_size](dst, dst_stride, const_above_row, left_col);
+  }
+}
+
+void vp10_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, int bhl_in,
+                              TX_SIZE tx_size, PREDICTION_MODE mode,
+                              const uint8_t *ref, int ref_stride, uint8_t *dst,
+                              int dst_stride, int col_off, int row_off,
+                              int plane) {
+  const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  const int txw = num_4x4_blocks_wide_txsize_lookup[tx_size];
+  const int txh = num_4x4_blocks_high_txsize_lookup[tx_size];
+  const int have_top = row_off || xd->up_available;
+  const int have_left = col_off || xd->left_available;
+  const int x = col_off * 4;
+  const int y = row_off * 4;
+  const int bw = pd->subsampling_x ? 1 << bwl_in : VPXMAX(2, 1 << bwl_in);
+  const int bh = pd->subsampling_y ? 1 << bhl_in : VPXMAX(2, 1 << bhl_in);
+  const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
+  const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
+  const int wpx = 4 * bw;
+  const int hpx = 4 * bh;
+  const int txwpx = 4 * txw;
+  const int txhpx = 4 * txh;
+  // Distance between the right edge of this prediction block to
+  // the frame right edge
+  const int xr =
+      (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + (wpx - x - txwpx);
+  // Distance between the bottom edge of this prediction block to
+  // the frame bottom edge
+  const int yd =
+      (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + (hpx - y - txhpx);
+  const int right_available =
+      (mi_col + ((col_off + txw) >> (1 - pd->subsampling_x))) <
+      xd->tile.mi_col_end;
+#if CONFIG_EXT_PARTITION_TYPES
+  const PARTITION_TYPE partition = xd->mi[0]->mbmi.partition;
+#endif
+  const int have_right =
+      vp10_has_right(bsize, mi_row, mi_col, right_available,
+#if CONFIG_EXT_PARTITION_TYPES
+                     partition,
+#endif
+                     tx_size, row_off, col_off, pd->subsampling_x);
+  const int have_bottom =
+      vp10_has_bottom(bsize, mi_row, mi_col, yd > 0, tx_size, row_off, col_off,
+                      pd->subsampling_y);
+
+  if (xd->mi[0]->mbmi.palette_mode_info.palette_size[plane != 0] > 0) {
+    const int bs = 4 * num_4x4_blocks_wide_txsize_lookup[tx_size];
+    const int stride = 4 * (1 << bwl_in);
+    int r, c;
+    uint8_t *map = NULL;
+#if CONFIG_VP9_HIGHBITDEPTH
+    uint16_t *palette = xd->mi[0]->mbmi.palette_mode_info.palette_colors +
+                        plane * PALETTE_MAX_SIZE;
+#else
+    uint8_t *palette = xd->mi[0]->mbmi.palette_mode_info.palette_colors +
+                       plane * PALETTE_MAX_SIZE;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+    map = xd->plane[plane != 0].color_index_map;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+      uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
+      for (r = 0; r < bs; ++r)
+        for (c = 0; c < bs; ++c)
+          dst16[r * dst_stride + c] = palette[map[(r + y) * stride + c + x]];
+    } else {
+      for (r = 0; r < bs; ++r)
+        for (c = 0; c < bs; ++c)
+          dst[r * dst_stride + c] =
+              (uint8_t)(palette[map[(r + y) * stride + c + x]]);
+    }
+#else
+    for (r = 0; r < bs; ++r)
+      for (c = 0; c < bs; ++c)
+        dst[r * dst_stride + c] = palette[map[(r + y) * stride + c + x]];
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+    return;
+  }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    build_intra_predictors_high(
+        xd, ref, ref_stride, dst, dst_stride, mode, tx_size,
+        have_top ? VPXMIN(txwpx, xr + txwpx) : 0,
+        have_top && have_right ? VPXMIN(txwpx, xr) : 0,
+        have_left ? VPXMIN(txhpx, yd + txhpx) : 0,
+        have_bottom && have_left ? VPXMIN(txhpx, yd) : 0, plane);
+    return;
+  }
+#endif
+  build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, tx_size,
+                         have_top ? VPXMIN(txwpx, xr + txwpx) : 0,
+                         have_top && have_right ? VPXMIN(txwpx, xr) : 0,
+                         have_left ? VPXMIN(txhpx, yd + txhpx) : 0,
+                         have_bottom && have_left ? VPXMIN(txhpx, yd) : 0,
+                         plane);
+}
+
+void vp10_init_intra_predictors(void) {
+  once(vp10_init_intra_predictors_internal);
+}
diff --git a/av1/common/reconintra.h b/av1/common/reconintra.h
new file mode 100644
index 0000000..d20b5a4
--- /dev/null
+++ b/av1/common/reconintra.h
@@ -0,0 +1,34 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_RECONINTRA_H_
+#define VP10_COMMON_RECONINTRA_H_
+
+#include "aom/vpx_integer.h"
+#include "av1/common/blockd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_init_intra_predictors(void);
+
+void vp10_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, int bhl_in,
+                              TX_SIZE tx_size, PREDICTION_MODE mode,
+                              const uint8_t *ref, int ref_stride, uint8_t *dst,
+                              int dst_stride, int aoff, int loff, int plane);
+#if CONFIG_EXT_INTRA
+int vp10_is_intra_filter_switchable(int angle);
+#endif  // CONFIG_EXT_INTRA
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_RECONINTRA_H_
diff --git a/av1/common/restoration.c b/av1/common/restoration.c
new file mode 100644
index 0000000..4d4c9fc
--- /dev/null
+++ b/av1/common/restoration.c
@@ -0,0 +1,642 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "av1/common/onyxc_int.h"
+#include "av1/common/restoration.h"
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_mem/vpx_mem.h"
+#include "aom_ports/mem.h"
+
+#define BILATERAL_PARAM_PRECISION 16
+#define BILATERAL_AMP_RANGE 256
+#define BILATERAL_AMP_RANGE_SYM (2 * BILATERAL_AMP_RANGE + 1)
+
+static uint8_t
+    bilateral_filter_coeffs_r_kf[BILATERAL_LEVELS_KF][BILATERAL_AMP_RANGE_SYM];
+static uint8_t
+    bilateral_filter_coeffs_r[BILATERAL_LEVELS][BILATERAL_AMP_RANGE_SYM];
+static uint8_t bilateral_filter_coeffs_s_kf[BILATERAL_LEVELS_KF]
+                                           [RESTORATION_WIN][RESTORATION_WIN];
+static uint8_t bilateral_filter_coeffs_s[BILATERAL_LEVELS][RESTORATION_WIN]
+                                        [RESTORATION_WIN];
+
+typedef struct bilateral_params {
+  int sigma_x;  // spatial variance x
+  int sigma_y;  // spatial variance y
+  int sigma_r;  // range variance
+} BilateralParamsType;
+
+static BilateralParamsType bilateral_level_to_params_arr[BILATERAL_LEVELS] = {
+  // Values are rounded to 1/16 th precision
+  { 8, 9, 30 },   { 9, 8, 30 },   { 9, 11, 32 },  { 11, 9, 32 },
+  { 14, 14, 32 }, { 18, 18, 36 }, { 24, 24, 40 }, { 32, 32, 40 },
+};
+
+static BilateralParamsType
+    bilateral_level_to_params_arr_kf[BILATERAL_LEVELS_KF] = {
+      // Values are rounded to 1/16 th precision
+      { 8, 8, 30 },   { 9, 9, 32 },   { 10, 10, 32 }, { 12, 12, 32 },
+      { 14, 14, 32 }, { 18, 18, 36 }, { 24, 24, 40 }, { 30, 30, 44 },
+      { 36, 36, 48 }, { 42, 42, 48 }, { 48, 48, 48 }, { 48, 48, 56 },
+      { 56, 56, 48 }, { 56, 56, 56 }, { 56, 56, 64 }, { 64, 64, 48 },
+    };
+
+typedef void (*restore_func_type)(uint8_t *data8, int width, int height,
+                                  int stride, RestorationInternal *rst,
+                                  uint8_t *tmpdata8, int tmpstride);
+#if CONFIG_VP9_HIGHBITDEPTH
+typedef void (*restore_func_highbd_type)(uint8_t *data8, int width, int height,
+                                         int stride, RestorationInternal *rst,
+                                         uint8_t *tmpdata8, int tmpstride,
+                                         int bit_depth);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+static INLINE BilateralParamsType vp10_bilateral_level_to_params(int index,
+                                                                 int kf) {
+  return kf ? bilateral_level_to_params_arr_kf[index]
+            : bilateral_level_to_params_arr[index];
+}
+
+typedef struct TileParams {
+  int width;
+  int height;
+} TileParams;
+
+static TileParams restoration_tile_sizes[RESTORATION_TILESIZES] = {
+  { 64, 64 }, { 128, 128 }, { 256, 256 }
+};
+
+void vp10_get_restoration_tile_size(int tilesize, int width, int height,
+                                    int *tile_width, int *tile_height,
+                                    int *nhtiles, int *nvtiles) {
+  *tile_width = (tilesize < 0)
+                    ? width
+                    : VPXMIN(restoration_tile_sizes[tilesize].width, width);
+  *tile_height = (tilesize < 0)
+                     ? height
+                     : VPXMIN(restoration_tile_sizes[tilesize].height, height);
+  *nhtiles = (width + (*tile_width >> 1)) / *tile_width;
+  *nvtiles = (height + (*tile_height >> 1)) / *tile_height;
+}
+
+int vp10_get_restoration_ntiles(int tilesize, int width, int height) {
+  int nhtiles, nvtiles;
+  int tile_width, tile_height;
+  vp10_get_restoration_tile_size(tilesize, width, height, &tile_width,
+                                 &tile_height, &nhtiles, &nvtiles);
+  return (nhtiles * nvtiles);
+}
+
+void vp10_loop_restoration_precal() {
+  int i;
+  for (i = 0; i < BILATERAL_LEVELS_KF; i++) {
+    const BilateralParamsType param = vp10_bilateral_level_to_params(i, 1);
+    const int sigma_x = param.sigma_x;
+    const int sigma_y = param.sigma_y;
+    const int sigma_r = param.sigma_r;
+    const double sigma_r_d = (double)sigma_r / BILATERAL_PARAM_PRECISION;
+    const double sigma_x_d = (double)sigma_x / BILATERAL_PARAM_PRECISION;
+    const double sigma_y_d = (double)sigma_y / BILATERAL_PARAM_PRECISION;
+
+    uint8_t *fr = bilateral_filter_coeffs_r_kf[i] + BILATERAL_AMP_RANGE;
+    int j, x, y;
+    for (j = 0; j <= BILATERAL_AMP_RANGE; j++) {
+      fr[j] = (uint8_t)(0.5 +
+                        RESTORATION_FILT_STEP *
+                            exp(-(j * j) / (2 * sigma_r_d * sigma_r_d)));
+      fr[-j] = fr[j];
+    }
+    for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; y++) {
+      for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; x++) {
+        bilateral_filter_coeffs_s_kf
+            [i][y + RESTORATION_HALFWIN][x + RESTORATION_HALFWIN] =
+                (uint8_t)(0.5 +
+                          RESTORATION_FILT_STEP *
+                              exp(-(x * x) / (2 * sigma_x_d * sigma_x_d) -
+                                  (y * y) / (2 * sigma_y_d * sigma_y_d)));
+      }
+    }
+  }
+  for (i = 0; i < BILATERAL_LEVELS; i++) {
+    const BilateralParamsType param = vp10_bilateral_level_to_params(i, 0);
+    const int sigma_x = param.sigma_x;
+    const int sigma_y = param.sigma_y;
+    const int sigma_r = param.sigma_r;
+    const double sigma_r_d = (double)sigma_r / BILATERAL_PARAM_PRECISION;
+    const double sigma_x_d = (double)sigma_x / BILATERAL_PARAM_PRECISION;
+    const double sigma_y_d = (double)sigma_y / BILATERAL_PARAM_PRECISION;
+
+    uint8_t *fr = bilateral_filter_coeffs_r[i] + BILATERAL_AMP_RANGE;
+    int j, x, y;
+    for (j = 0; j <= BILATERAL_AMP_RANGE; j++) {
+      fr[j] = (uint8_t)(0.5 +
+                        RESTORATION_FILT_STEP *
+                            exp(-(j * j) / (2 * sigma_r_d * sigma_r_d)));
+      fr[-j] = fr[j];
+    }
+    for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; y++) {
+      for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; x++) {
+        bilateral_filter_coeffs_s[i][y +
+                                     RESTORATION_HALFWIN][x +
+                                                          RESTORATION_HALFWIN] =
+            (uint8_t)(0.5 +
+                      RESTORATION_FILT_STEP *
+                          exp(-(x * x) / (2 * sigma_x_d * sigma_x_d) -
+                              (y * y) / (2 * sigma_y_d * sigma_y_d)));
+      }
+    }
+  }
+}
+
+int vp10_bilateral_level_bits(const VP10_COMMON *const cm) {
+  return cm->frame_type == KEY_FRAME ? BILATERAL_LEVEL_BITS_KF
+                                     : BILATERAL_LEVEL_BITS;
+}
+
+void vp10_loop_restoration_init(RestorationInternal *rst, RestorationInfo *rsi,
+                                int kf, int width, int height) {
+  int i, tile_idx;
+  rst->restoration_type = rsi->restoration_type;
+  rst->subsampling_x = 0;
+  rst->subsampling_y = 0;
+  if (rsi->restoration_type == RESTORE_BILATERAL) {
+    rst->tilesize_index = BILATERAL_TILESIZE;
+    rst->ntiles =
+        vp10_get_restoration_ntiles(rst->tilesize_index, width, height);
+    vp10_get_restoration_tile_size(rst->tilesize_index, width, height,
+                                   &rst->tile_width, &rst->tile_height,
+                                   &rst->nhtiles, &rst->nvtiles);
+    rst->bilateral_level = rsi->bilateral_level;
+    rst->wr_lut = (uint8_t **)malloc(sizeof(*rst->wr_lut) * rst->ntiles);
+    assert(rst->wr_lut != NULL);
+    rst->wx_lut = (uint8_t(**)[RESTORATION_WIN])malloc(sizeof(*rst->wx_lut) *
+                                                       rst->ntiles);
+    assert(rst->wx_lut != NULL);
+    for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
+      const int level = rsi->bilateral_level[tile_idx];
+      if (level >= 0) {
+        rst->wr_lut[tile_idx] = kf ? bilateral_filter_coeffs_r_kf[level]
+                                   : bilateral_filter_coeffs_r[level];
+        rst->wx_lut[tile_idx] = kf ? bilateral_filter_coeffs_s_kf[level]
+                                   : bilateral_filter_coeffs_s[level];
+      }
+    }
+  } else if (rsi->restoration_type == RESTORE_WIENER) {
+    rst->tilesize_index = WIENER_TILESIZE;
+    rst->ntiles =
+        vp10_get_restoration_ntiles(rst->tilesize_index, width, height);
+    vp10_get_restoration_tile_size(rst->tilesize_index, width, height,
+                                   &rst->tile_width, &rst->tile_height,
+                                   &rst->nhtiles, &rst->nvtiles);
+    rst->wiener_level = rsi->wiener_level;
+    rst->vfilter =
+        (int(*)[RESTORATION_WIN])malloc(sizeof(*rst->vfilter) * rst->ntiles);
+    assert(rst->vfilter != NULL);
+    rst->hfilter =
+        (int(*)[RESTORATION_WIN])malloc(sizeof(*rst->hfilter) * rst->ntiles);
+    assert(rst->hfilter != NULL);
+    for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
+      rst->vfilter[tile_idx][RESTORATION_HALFWIN] =
+          rst->hfilter[tile_idx][RESTORATION_HALFWIN] = RESTORATION_FILT_STEP;
+      for (i = 0; i < RESTORATION_HALFWIN; ++i) {
+        rst->vfilter[tile_idx][i] =
+            rst->vfilter[tile_idx][RESTORATION_WIN - 1 - i] =
+                rsi->vfilter[tile_idx][i];
+        rst->hfilter[tile_idx][i] =
+            rst->hfilter[tile_idx][RESTORATION_WIN - 1 - i] =
+                rsi->hfilter[tile_idx][i];
+        rst->vfilter[tile_idx][RESTORATION_HALFWIN] -=
+            2 * rsi->vfilter[tile_idx][i];
+        rst->hfilter[tile_idx][RESTORATION_HALFWIN] -=
+            2 * rsi->hfilter[tile_idx][i];
+      }
+    }
+  }
+}
+
+static void loop_bilateral_filter(uint8_t *data, int width, int height,
+                                  int stride, RestorationInternal *rst,
+                                  uint8_t *tmpdata, int tmpstride) {
+  int i, j, tile_idx, htile_idx, vtile_idx;
+  int h_start, h_end, v_start, v_end;
+  int tile_width, tile_height;
+
+  tile_width = rst->tile_width >> rst->subsampling_x;
+  tile_height = rst->tile_height >> rst->subsampling_y;
+
+  for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
+    uint8_t *data_p, *tmpdata_p;
+    const uint8_t *wr_lut_ = rst->wr_lut[tile_idx] + BILATERAL_AMP_RANGE;
+
+    if (rst->bilateral_level[tile_idx] < 0) continue;
+
+    htile_idx = tile_idx % rst->nhtiles;
+    vtile_idx = tile_idx / rst->nhtiles;
+    h_start =
+        htile_idx * tile_width + ((htile_idx > 0) ? 0 : RESTORATION_HALFWIN);
+    h_end = (htile_idx < rst->nhtiles - 1) ? ((htile_idx + 1) * tile_width)
+                                           : (width - RESTORATION_HALFWIN);
+    v_start =
+        vtile_idx * tile_height + ((vtile_idx > 0) ? 0 : RESTORATION_HALFWIN);
+    v_end = (vtile_idx < rst->nvtiles - 1) ? ((vtile_idx + 1) * tile_height)
+                                           : (height - RESTORATION_HALFWIN);
+
+    data_p = data + h_start + v_start * stride;
+    tmpdata_p = tmpdata + h_start + v_start * tmpstride;
+
+    for (i = 0; i < (v_end - v_start); ++i) {
+      for (j = 0; j < (h_end - h_start); ++j) {
+        int x, y;
+        int flsum = 0, wtsum = 0, wt;
+        uint8_t *data_p2 = data_p + j - RESTORATION_HALFWIN * stride;
+        for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; ++y) {
+          for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; ++x) {
+            wt = (int)rst->wx_lut[tile_idx][y + RESTORATION_HALFWIN]
+                                 [x + RESTORATION_HALFWIN] *
+                 (int)wr_lut_[data_p2[x] - data_p[j]];
+            wtsum += wt;
+            flsum += wt * data_p2[x];
+          }
+          data_p2 += stride;
+        }
+        if (wtsum > 0)
+          tmpdata_p[j] = clip_pixel((int)((flsum + wtsum / 2) / wtsum));
+        else
+          tmpdata_p[j] = data_p[j];
+      }
+      tmpdata_p += tmpstride;
+      data_p += stride;
+    }
+    for (i = v_start; i < v_end; ++i) {
+      memcpy(data + i * stride + h_start, tmpdata + i * tmpstride + h_start,
+             (h_end - h_start) * sizeof(*data));
+    }
+  }
+}
+
+uint8_t hor_sym_filter(uint8_t *d, int *hfilter) {
+  int32_t s =
+      (1 << (RESTORATION_FILT_BITS - 1)) + d[0] * hfilter[RESTORATION_HALFWIN];
+  int i;
+  for (i = 1; i <= RESTORATION_HALFWIN; ++i)
+    s += (d[i] + d[-i]) * hfilter[RESTORATION_HALFWIN + i];
+  return clip_pixel(s >> RESTORATION_FILT_BITS);
+}
+
+uint8_t ver_sym_filter(uint8_t *d, int stride, int *vfilter) {
+  int32_t s =
+      (1 << (RESTORATION_FILT_BITS - 1)) + d[0] * vfilter[RESTORATION_HALFWIN];
+  int i;
+  for (i = 1; i <= RESTORATION_HALFWIN; ++i)
+    s += (d[i * stride] + d[-i * stride]) * vfilter[RESTORATION_HALFWIN + i];
+  return clip_pixel(s >> RESTORATION_FILT_BITS);
+}
+
+static void loop_wiener_filter(uint8_t *data, int width, int height, int stride,
+                               RestorationInternal *rst, uint8_t *tmpdata,
+                               int tmpstride) {
+  int i, j, tile_idx, htile_idx, vtile_idx;
+  int h_start, h_end, v_start, v_end;
+  int tile_width, tile_height;
+  uint8_t *data_p, *tmpdata_p;
+
+  tile_width = rst->tile_width >> rst->subsampling_x;
+  tile_height = rst->tile_height >> rst->subsampling_y;
+
+  // Initialize tmp buffer
+  data_p = data;
+  tmpdata_p = tmpdata;
+  for (i = 0; i < height; ++i) {
+    memcpy(tmpdata_p, data_p, sizeof(*data_p) * width);
+    data_p += stride;
+    tmpdata_p += tmpstride;
+  }
+
+  // Filter row-wise tile-by-tile
+  for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
+    if (rst->wiener_level[tile_idx] == 0) continue;
+    htile_idx = tile_idx % rst->nhtiles;
+    vtile_idx = tile_idx / rst->nhtiles;
+    h_start =
+        htile_idx * tile_width + ((htile_idx > 0) ? 0 : RESTORATION_HALFWIN);
+    h_end = (htile_idx < rst->nhtiles - 1) ? ((htile_idx + 1) * tile_width)
+                                           : (width - RESTORATION_HALFWIN);
+    v_start = vtile_idx * tile_height;
+    v_end = (vtile_idx < rst->nvtiles - 1) ? ((vtile_idx + 1) * tile_height)
+                                           : height;
+    data_p = data + h_start + v_start * stride;
+    tmpdata_p = tmpdata + h_start + v_start * tmpstride;
+    for (i = 0; i < (v_end - v_start); ++i) {
+      for (j = 0; j < (h_end - h_start); ++j) {
+        *tmpdata_p++ = hor_sym_filter(data_p++, rst->hfilter[tile_idx]);
+      }
+      data_p += stride - (h_end - h_start);
+      tmpdata_p += tmpstride - (h_end - h_start);
+    }
+  }
+
+  // Filter column-wise tile-by-tile (bands of thickness RESTORATION_HALFWIN
+  // at top and bottom of tiles allow filtering overlap, and are not optimally
+  // filtered)
+  for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
+    if (rst->wiener_level[tile_idx] == 0) continue;
+    htile_idx = tile_idx % rst->nhtiles;
+    vtile_idx = tile_idx / rst->nhtiles;
+    h_start = htile_idx * tile_width;
+    h_end =
+        (htile_idx < rst->nhtiles - 1) ? ((htile_idx + 1) * tile_width) : width;
+    v_start =
+        vtile_idx * tile_height + ((vtile_idx > 0) ? 0 : RESTORATION_HALFWIN);
+    v_end = (vtile_idx < rst->nvtiles - 1) ? ((vtile_idx + 1) * tile_height)
+                                           : (height - RESTORATION_HALFWIN);
+    data_p = data + h_start + v_start * stride;
+    tmpdata_p = tmpdata + h_start + v_start * tmpstride;
+    for (i = 0; i < (v_end - v_start); ++i) {
+      for (j = 0; j < (h_end - h_start); ++j) {
+        *data_p++ =
+            ver_sym_filter(tmpdata_p++, tmpstride, rst->vfilter[tile_idx]);
+      }
+      data_p += stride - (h_end - h_start);
+      tmpdata_p += tmpstride - (h_end - h_start);
+    }
+  }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void loop_bilateral_filter_highbd(uint8_t *data8, int width, int height,
+                                         int stride, RestorationInternal *rst,
+                                         uint8_t *tmpdata8, int tmpstride,
+                                         int bit_depth) {
+  int i, j, tile_idx, htile_idx, vtile_idx;
+  int h_start, h_end, v_start, v_end;
+  int tile_width, tile_height;
+
+  uint16_t *data = CONVERT_TO_SHORTPTR(data8);
+  uint16_t *tmpdata = CONVERT_TO_SHORTPTR(tmpdata8);
+
+  tile_width = rst->tile_width >> rst->subsampling_x;
+  tile_height = rst->tile_height >> rst->subsampling_y;
+
+  for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
+    uint16_t *data_p, *tmpdata_p;
+    const uint8_t *wr_lut_ = rst->wr_lut[tile_idx] + BILATERAL_AMP_RANGE;
+
+    if (rst->bilateral_level[tile_idx] < 0) continue;
+
+    htile_idx = tile_idx % rst->nhtiles;
+    vtile_idx = tile_idx / rst->nhtiles;
+    h_start =
+        htile_idx * tile_width + ((htile_idx > 0) ? 0 : RESTORATION_HALFWIN);
+    h_end = (htile_idx < rst->nhtiles - 1) ? ((htile_idx + 1) * tile_width)
+                                           : (width - RESTORATION_HALFWIN);
+    v_start =
+        vtile_idx * tile_height + ((vtile_idx > 0) ? 0 : RESTORATION_HALFWIN);
+    v_end = (vtile_idx < rst->nvtiles - 1) ? ((vtile_idx + 1) * tile_height)
+                                           : (height - RESTORATION_HALFWIN);
+
+    data_p = data + h_start + v_start * stride;
+    tmpdata_p = tmpdata + h_start + v_start * tmpstride;
+
+    for (i = 0; i < (v_end - v_start); ++i) {
+      for (j = 0; j < (h_end - h_start); ++j) {
+        int x, y;
+        int flsum = 0, wtsum = 0, wt;
+        uint16_t *data_p2 = data_p + j - RESTORATION_HALFWIN * stride;
+        for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; ++y) {
+          for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; ++x) {
+            wt = (int)rst->wx_lut[tile_idx][y + RESTORATION_HALFWIN]
+                                 [x + RESTORATION_HALFWIN] *
+                 (int)wr_lut_[data_p2[x] - data_p[j]];
+            wtsum += wt;
+            flsum += wt * data_p2[x];
+          }
+          data_p2 += stride;
+        }
+        if (wtsum > 0)
+          tmpdata_p[j] =
+              clip_pixel_highbd((int)((flsum + wtsum / 2) / wtsum), bit_depth);
+        else
+          tmpdata_p[j] = data_p[j];
+      }
+      tmpdata_p += tmpstride;
+      data_p += stride;
+    }
+    for (i = v_start; i < v_end; ++i) {
+      memcpy(data + i * stride + h_start, tmpdata + i * tmpstride + h_start,
+             (h_end - h_start) * sizeof(*data));
+    }
+  }
+}
+
+uint16_t hor_sym_filter_highbd(uint16_t *d, int *hfilter, int bd) {
+  int32_t s =
+      (1 << (RESTORATION_FILT_BITS - 1)) + d[0] * hfilter[RESTORATION_HALFWIN];
+  int i;
+  for (i = 1; i <= RESTORATION_HALFWIN; ++i)
+    s += (d[i] + d[-i]) * hfilter[RESTORATION_HALFWIN + i];
+  return clip_pixel_highbd(s >> RESTORATION_FILT_BITS, bd);
+}
+
+uint16_t ver_sym_filter_highbd(uint16_t *d, int stride, int *vfilter, int bd) {
+  int32_t s =
+      (1 << (RESTORATION_FILT_BITS - 1)) + d[0] * vfilter[RESTORATION_HALFWIN];
+  int i;
+  for (i = 1; i <= RESTORATION_HALFWIN; ++i)
+    s += (d[i * stride] + d[-i * stride]) * vfilter[RESTORATION_HALFWIN + i];
+  return clip_pixel_highbd(s >> RESTORATION_FILT_BITS, bd);
+}
+
+static void loop_wiener_filter_highbd(uint8_t *data8, int width, int height,
+                                      int stride, RestorationInternal *rst,
+                                      uint8_t *tmpdata8, int tmpstride,
+                                      int bit_depth) {
+  uint16_t *data = CONVERT_TO_SHORTPTR(data8);
+  uint16_t *tmpdata = CONVERT_TO_SHORTPTR(tmpdata8);
+  int i, j, tile_idx, htile_idx, vtile_idx;
+  int h_start, h_end, v_start, v_end;
+  int tile_width, tile_height;
+  uint16_t *data_p, *tmpdata_p;
+
+  tile_width = rst->tile_width >> rst->subsampling_x;
+  tile_height = rst->tile_height >> rst->subsampling_y;
+
+  // Initialize tmp buffer
+  data_p = data;
+  tmpdata_p = tmpdata;
+  for (i = 0; i < height; ++i) {
+    memcpy(tmpdata_p, data_p, sizeof(*data_p) * width);
+    data_p += stride;
+    tmpdata_p += tmpstride;
+  }
+
+  // Filter row-wise tile-by-tile
+  for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
+    if (rst->wiener_level[tile_idx] == 0) continue;
+    htile_idx = tile_idx % rst->nhtiles;
+    vtile_idx = tile_idx / rst->nhtiles;
+    h_start =
+        htile_idx * tile_width + ((htile_idx > 0) ? 0 : RESTORATION_HALFWIN);
+    h_end = (htile_idx < rst->nhtiles - 1) ? ((htile_idx + 1) * tile_width)
+                                           : (width - RESTORATION_HALFWIN);
+    v_start = vtile_idx * tile_height;
+    v_end = (vtile_idx < rst->nvtiles - 1) ? ((vtile_idx + 1) * tile_height)
+                                           : height;
+    data_p = data + h_start + v_start * stride;
+    tmpdata_p = tmpdata + h_start + v_start * tmpstride;
+    for (i = 0; i < (v_end - v_start); ++i) {
+      for (j = 0; j < (h_end - h_start); ++j) {
+        *tmpdata_p++ =
+            hor_sym_filter_highbd(data_p++, rst->hfilter[tile_idx], bit_depth);
+      }
+      data_p += stride - (h_end - h_start);
+      tmpdata_p += tmpstride - (h_end - h_start);
+    }
+  }
+
+  // Filter column-wise tile-by-tile (bands of thickness RESTORATION_HALFWIN
+  // at top and bottom of tiles allow filtering overlap, and are not optimally
+  // filtered)
+  for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
+    if (rst->wiener_level[tile_idx] == 0) continue;
+    htile_idx = tile_idx % rst->nhtiles;
+    vtile_idx = tile_idx / rst->nhtiles;
+    h_start = htile_idx * tile_width;
+    h_end =
+        (htile_idx < rst->nhtiles - 1) ? ((htile_idx + 1) * tile_width) : width;
+    v_start =
+        vtile_idx * tile_height + ((vtile_idx > 0) ? 0 : RESTORATION_HALFWIN);
+    v_end = (vtile_idx < rst->nvtiles - 1) ? ((vtile_idx + 1) * tile_height)
+                                           : (height - RESTORATION_HALFWIN);
+    data_p = data + h_start + v_start * stride;
+    tmpdata_p = tmpdata + h_start + v_start * tmpstride;
+    for (i = 0; i < (v_end - v_start); ++i) {
+      for (j = 0; j < (h_end - h_start); ++j) {
+        *data_p++ = ver_sym_filter_highbd(tmpdata_p++, tmpstride,
+                                          rst->vfilter[tile_idx], bit_depth);
+      }
+      data_p += stride - (h_end - h_start);
+      tmpdata_p += tmpstride - (h_end - h_start);
+    }
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+void vp10_loop_restoration_rows(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm,
+                                int start_mi_row, int end_mi_row, int y_only) {
+  const int ywidth = frame->y_crop_width;
+  const int ystride = frame->y_stride;
+  const int uvwidth = frame->uv_crop_width;
+  const int uvstride = frame->uv_stride;
+  const int ystart = start_mi_row << MI_SIZE_LOG2;
+  const int uvstart = ystart >> cm->subsampling_y;
+  int yend = end_mi_row << MI_SIZE_LOG2;
+  int uvend = yend >> cm->subsampling_y;
+  restore_func_type restore_func =
+      cm->rst_internal.restoration_type == RESTORE_BILATERAL
+          ? loop_bilateral_filter
+          : loop_wiener_filter;
+#if CONFIG_VP9_HIGHBITDEPTH
+  restore_func_highbd_type restore_func_highbd =
+      cm->rst_internal.restoration_type == RESTORE_BILATERAL
+          ? loop_bilateral_filter_highbd
+          : loop_wiener_filter_highbd;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+  YV12_BUFFER_CONFIG tmp_buf;
+  memset(&tmp_buf, 0, sizeof(YV12_BUFFER_CONFIG));
+
+  yend = VPXMIN(yend, cm->height);
+  uvend = VPXMIN(uvend, cm->subsampling_y ? (cm->height + 1) >> 1 : cm->height);
+
+  if (vpx_realloc_frame_buffer(
+          &tmp_buf, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+          cm->use_highbitdepth,
+#endif
+          VPX_DEC_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL) < 0)
+    vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+                       "Failed to allocate tmp restoration buffer");
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (cm->use_highbitdepth)
+    restore_func_highbd(frame->y_buffer + ystart * ystride, ywidth,
+                        yend - ystart, ystride, &cm->rst_internal,
+                        tmp_buf.y_buffer + ystart * tmp_buf.y_stride,
+                        tmp_buf.y_stride, cm->bit_depth);
+  else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+    restore_func(frame->y_buffer + ystart * ystride, ywidth, yend - ystart,
+                 ystride, &cm->rst_internal,
+                 tmp_buf.y_buffer + ystart * tmp_buf.y_stride,
+                 tmp_buf.y_stride);
+  if (!y_only) {
+    cm->rst_internal.subsampling_x = cm->subsampling_x;
+    cm->rst_internal.subsampling_y = cm->subsampling_y;
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (cm->use_highbitdepth) {
+      restore_func_highbd(frame->u_buffer + uvstart * uvstride, uvwidth,
+                          uvend - uvstart, uvstride, &cm->rst_internal,
+                          tmp_buf.u_buffer + uvstart * tmp_buf.uv_stride,
+                          tmp_buf.uv_stride, cm->bit_depth);
+      restore_func_highbd(frame->v_buffer + uvstart * uvstride, uvwidth,
+                          uvend - uvstart, uvstride, &cm->rst_internal,
+                          tmp_buf.v_buffer + uvstart * tmp_buf.uv_stride,
+                          tmp_buf.uv_stride, cm->bit_depth);
+    } else {
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+      restore_func(frame->u_buffer + uvstart * uvstride, uvwidth,
+                   uvend - uvstart, uvstride, &cm->rst_internal,
+                   tmp_buf.u_buffer + uvstart * tmp_buf.uv_stride,
+                   tmp_buf.uv_stride);
+      restore_func(frame->v_buffer + uvstart * uvstride, uvwidth,
+                   uvend - uvstart, uvstride, &cm->rst_internal,
+                   tmp_buf.v_buffer + uvstart * tmp_buf.uv_stride,
+                   tmp_buf.uv_stride);
+#if CONFIG_VP9_HIGHBITDEPTH
+    }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+  }
+  vpx_free_frame_buffer(&tmp_buf);
+  if (cm->rst_internal.restoration_type == RESTORE_BILATERAL) {
+    free(cm->rst_internal.wr_lut);
+    cm->rst_internal.wr_lut = NULL;
+    free(cm->rst_internal.wx_lut);
+    cm->rst_internal.wx_lut = NULL;
+  }
+  if (cm->rst_internal.restoration_type == RESTORE_WIENER) {
+    free(cm->rst_internal.vfilter);
+    cm->rst_internal.vfilter = NULL;
+    free(cm->rst_internal.hfilter);
+    cm->rst_internal.hfilter = NULL;
+  }
+}
+
+void vp10_loop_restoration_frame(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm,
+                                 RestorationInfo *rsi, int y_only,
+                                 int partial_frame) {
+  int start_mi_row, end_mi_row, mi_rows_to_filter;
+  if (rsi->restoration_type != RESTORE_NONE) {
+    start_mi_row = 0;
+    mi_rows_to_filter = cm->mi_rows;
+    if (partial_frame && cm->mi_rows > 8) {
+      start_mi_row = cm->mi_rows >> 1;
+      start_mi_row &= 0xfffffff8;
+      mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8);
+    }
+    end_mi_row = start_mi_row + mi_rows_to_filter;
+    vp10_loop_restoration_init(&cm->rst_internal, rsi,
+                               cm->frame_type == KEY_FRAME, cm->width,
+                               cm->height);
+    vp10_loop_restoration_rows(frame, cm, start_mi_row, end_mi_row, y_only);
+  }
+}
diff --git a/av1/common/restoration.h b/av1/common/restoration.h
new file mode 100644
index 0000000..c1e937a
--- /dev/null
+++ b/av1/common/restoration.h
@@ -0,0 +1,109 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_RESTORATION_H_
+#define VP10_COMMON_RESTORATION_H_
+
+#include "aom_ports/mem.h"
+#include "./vpx_config.h"
+
+#include "av1/common/blockd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define BILATERAL_LEVEL_BITS_KF 4
+#define BILATERAL_LEVELS_KF (1 << BILATERAL_LEVEL_BITS_KF)
+#define BILATERAL_LEVEL_BITS 3
+#define BILATERAL_LEVELS (1 << BILATERAL_LEVEL_BITS)
+// #define DEF_BILATERAL_LEVEL     2
+
+#define RESTORATION_TILESIZES 3
+#define BILATERAL_TILESIZE 0
+#define WIENER_TILESIZE 2
+
+#define RESTORATION_HALFWIN 3
+#define RESTORATION_HALFWIN1 (RESTORATION_HALFWIN + 1)
+#define RESTORATION_WIN (2 * RESTORATION_HALFWIN + 1)
+#define RESTORATION_WIN2 ((RESTORATION_WIN) * (RESTORATION_WIN))
+
+#define RESTORATION_FILT_BITS 7
+#define RESTORATION_FILT_STEP (1 << RESTORATION_FILT_BITS)
+
+#define WIENER_FILT_TAP0_MINV (-5)
+#define WIENER_FILT_TAP1_MINV (-23)
+#define WIENER_FILT_TAP2_MINV (-16)
+
+#define WIENER_FILT_TAP0_BITS 4
+#define WIENER_FILT_TAP1_BITS 5
+#define WIENER_FILT_TAP2_BITS 6
+
+#define WIENER_FILT_BITS \
+  ((WIENER_FILT_TAP0_BITS + WIENER_FILT_TAP1_BITS + WIENER_FILT_TAP2_BITS) * 2)
+
+#define WIENER_FILT_TAP0_MAXV \
+  (WIENER_FILT_TAP0_MINV - 1 + (1 << WIENER_FILT_TAP0_BITS))
+#define WIENER_FILT_TAP1_MAXV \
+  (WIENER_FILT_TAP1_MINV - 1 + (1 << WIENER_FILT_TAP1_BITS))
+#define WIENER_FILT_TAP2_MAXV \
+  (WIENER_FILT_TAP2_MINV - 1 + (1 << WIENER_FILT_TAP2_BITS))
+
+typedef enum {
+  RESTORE_NONE,
+  RESTORE_BILATERAL,
+  RESTORE_WIENER,
+} RestorationType;
+
+typedef struct {
+  RestorationType restoration_type;
+  // Bilateral filter
+  int *bilateral_level;
+  // Wiener filter
+  int *wiener_level;
+  int (*vfilter)[RESTORATION_HALFWIN], (*hfilter)[RESTORATION_HALFWIN];
+} RestorationInfo;
+
+typedef struct {
+  RestorationType restoration_type;
+  int subsampling_x;
+  int subsampling_y;
+  int tilesize_index;
+  int ntiles;
+  int tile_width, tile_height;
+  int nhtiles, nvtiles;
+  // Bilateral filter
+  int *bilateral_level;
+  uint8_t (**wx_lut)[RESTORATION_WIN];
+  uint8_t **wr_lut;
+  // Wiener filter
+  int *wiener_level;
+  int (*vfilter)[RESTORATION_WIN], (*hfilter)[RESTORATION_WIN];
+} RestorationInternal;
+
+int vp10_bilateral_level_bits(const struct VP10Common *const cm);
+int vp10_get_restoration_ntiles(int tilesize, int width, int height);
+void vp10_get_restoration_tile_size(int tilesize, int width, int height,
+                                    int *tile_width, int *tile_height,
+                                    int *nhtiles, int *nvtiles);
+void vp10_loop_restoration_init(RestorationInternal *rst, RestorationInfo *rsi,
+                                int kf, int width, int height);
+void vp10_loop_restoration_frame(YV12_BUFFER_CONFIG *frame,
+                                 struct VP10Common *cm, RestorationInfo *rsi,
+                                 int y_only, int partial_frame);
+void vp10_loop_restoration_rows(YV12_BUFFER_CONFIG *frame,
+                                struct VP10Common *cm, int start_mi_row,
+                                int end_mi_row, int y_only);
+void vp10_loop_restoration_precal();
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_RESTORATION_H_
diff --git a/av1/common/scale.c b/av1/common/scale.c
new file mode 100644
index 0000000..6bd3b74
--- /dev/null
+++ b/av1/common/scale.c
@@ -0,0 +1,183 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_dsp_rtcd.h"
+#include "av1/common/filter.h"
+#include "av1/common/scale.h"
+#include "aom_dsp/vpx_filter.h"
+
+static INLINE int scaled_x(int val, const struct scale_factors *sf) {
+  return (int)((int64_t)val * sf->x_scale_fp >> REF_SCALE_SHIFT);
+}
+
+static INLINE int scaled_y(int val, const struct scale_factors *sf) {
+  return (int)((int64_t)val * sf->y_scale_fp >> REF_SCALE_SHIFT);
+}
+
+static int unscaled_value(int val, const struct scale_factors *sf) {
+  (void)sf;
+  return val;
+}
+
+static int get_fixed_point_scale_factor(int other_size, int this_size) {
+  // Calculate scaling factor once for each reference frame
+  // and use fixed point scaling factors in decoding and encoding routines.
+  // Hardware implementations can calculate scale factor in device driver
+  // and use multiplication and shifting on hardware instead of division.
+  return (other_size << REF_SCALE_SHIFT) / this_size;
+}
+
+MV32 vp10_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf) {
+  const int x_off_q4 = scaled_x(x << SUBPEL_BITS, sf) & SUBPEL_MASK;
+  const int y_off_q4 = scaled_y(y << SUBPEL_BITS, sf) & SUBPEL_MASK;
+  const MV32 res = { scaled_y(mv->row, sf) + y_off_q4,
+                     scaled_x(mv->col, sf) + x_off_q4 };
+  return res;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
+                                        int other_h, int this_w, int this_h,
+                                        int use_highbd) {
+#else
+void vp10_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
+                                        int other_h, int this_w, int this_h) {
+#endif
+  if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) {
+    sf->x_scale_fp = REF_INVALID_SCALE;
+    sf->y_scale_fp = REF_INVALID_SCALE;
+    return;
+  }
+
+  sf->x_scale_fp = get_fixed_point_scale_factor(other_w, this_w);
+  sf->y_scale_fp = get_fixed_point_scale_factor(other_h, this_h);
+  sf->x_step_q4 = scaled_x(16, sf);
+  sf->y_step_q4 = scaled_y(16, sf);
+
+  if (vp10_is_scaled(sf)) {
+    sf->scale_value_x = scaled_x;
+    sf->scale_value_y = scaled_y;
+  } else {
+    sf->scale_value_x = unscaled_value;
+    sf->scale_value_y = unscaled_value;
+  }
+
+// TODO(agrange): Investigate the best choice of functions to use here
+// for EIGHTTAP_SMOOTH. Since it is not interpolating, need to choose what
+// to do at full-pel offsets. The current selection, where the filter is
+// applied in one direction only, and not at all for 0,0, seems to give the
+// best quality, but it may be worth trying an additional mode that does
+// do the filtering on full-pel.
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+  sf->predict_ni[0][0][0] = vpx_convolve8_c;
+  sf->predict_ni[0][0][1] = vpx_convolve8_avg_c;
+  sf->predict_ni[0][1][0] = vpx_convolve8_c;
+  sf->predict_ni[0][1][1] = vpx_convolve8_avg_c;
+  sf->predict_ni[1][0][0] = vpx_convolve8_c;
+  sf->predict_ni[1][0][1] = vpx_convolve8_avg_c;
+  sf->predict_ni[1][1][0] = vpx_convolve8;
+  sf->predict_ni[1][1][1] = vpx_convolve8_avg;
+#endif  // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+  if (sf->x_step_q4 == 16) {
+    if (sf->y_step_q4 == 16) {
+      // No scaling in either direction.
+      sf->predict[0][0][0] = vpx_convolve_copy;
+      sf->predict[0][0][1] = vpx_convolve_avg;
+      sf->predict[0][1][0] = vpx_convolve8_vert;
+      sf->predict[0][1][1] = vpx_convolve8_avg_vert;
+      sf->predict[1][0][0] = vpx_convolve8_horiz;
+      sf->predict[1][0][1] = vpx_convolve8_avg_horiz;
+    } else {
+      // No scaling in x direction. Must always scale in the y direction.
+      sf->predict[0][0][0] = vpx_convolve8_vert;
+      sf->predict[0][0][1] = vpx_convolve8_avg_vert;
+      sf->predict[0][1][0] = vpx_convolve8_vert;
+      sf->predict[0][1][1] = vpx_convolve8_avg_vert;
+      sf->predict[1][0][0] = vpx_convolve8;
+      sf->predict[1][0][1] = vpx_convolve8_avg;
+    }
+  } else {
+    if (sf->y_step_q4 == 16) {
+      // No scaling in the y direction. Must always scale in the x direction.
+      sf->predict[0][0][0] = vpx_convolve8_horiz;
+      sf->predict[0][0][1] = vpx_convolve8_avg_horiz;
+      sf->predict[0][1][0] = vpx_convolve8;
+      sf->predict[0][1][1] = vpx_convolve8_avg;
+      sf->predict[1][0][0] = vpx_convolve8_horiz;
+      sf->predict[1][0][1] = vpx_convolve8_avg_horiz;
+    } else {
+      // Must always scale in both directions.
+      sf->predict[0][0][0] = vpx_convolve8;
+      sf->predict[0][0][1] = vpx_convolve8_avg;
+      sf->predict[0][1][0] = vpx_convolve8;
+      sf->predict[0][1][1] = vpx_convolve8_avg;
+      sf->predict[1][0][0] = vpx_convolve8;
+      sf->predict[1][0][1] = vpx_convolve8_avg;
+    }
+  }
+  // 2D subpel motion always gets filtered in both directions
+  sf->predict[1][1][0] = vpx_convolve8;
+  sf->predict[1][1][1] = vpx_convolve8_avg;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (use_highbd) {
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+    sf->highbd_predict_ni[0][0][0] = vpx_highbd_convolve8_c;
+    sf->highbd_predict_ni[0][0][1] = vpx_highbd_convolve8_avg_c;
+    sf->highbd_predict_ni[0][1][0] = vpx_highbd_convolve8_c;
+    sf->highbd_predict_ni[0][1][1] = vpx_highbd_convolve8_avg_c;
+    sf->highbd_predict_ni[1][0][0] = vpx_highbd_convolve8_c;
+    sf->highbd_predict_ni[1][0][1] = vpx_highbd_convolve8_avg_c;
+    sf->highbd_predict_ni[1][1][0] = vpx_highbd_convolve8;
+    sf->highbd_predict_ni[1][1][1] = vpx_highbd_convolve8_avg;
+#endif  // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+    if (sf->x_step_q4 == 16) {
+      if (sf->y_step_q4 == 16) {
+        // No scaling in either direction.
+        sf->highbd_predict[0][0][0] = vpx_highbd_convolve_copy;
+        sf->highbd_predict[0][0][1] = vpx_highbd_convolve_avg;
+        sf->highbd_predict[0][1][0] = vpx_highbd_convolve8_vert;
+        sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg_vert;
+        sf->highbd_predict[1][0][0] = vpx_highbd_convolve8_horiz;
+        sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg_horiz;
+      } else {
+        // No scaling in x direction. Must always scale in the y direction.
+        sf->highbd_predict[0][0][0] = vpx_highbd_convolve8_vert;
+        sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg_vert;
+        sf->highbd_predict[0][1][0] = vpx_highbd_convolve8_vert;
+        sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg_vert;
+        sf->highbd_predict[1][0][0] = vpx_highbd_convolve8;
+        sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg;
+      }
+    } else {
+      if (sf->y_step_q4 == 16) {
+        // No scaling in the y direction. Must always scale in the x direction.
+        sf->highbd_predict[0][0][0] = vpx_highbd_convolve8_horiz;
+        sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg_horiz;
+        sf->highbd_predict[0][1][0] = vpx_highbd_convolve8;
+        sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg;
+        sf->highbd_predict[1][0][0] = vpx_highbd_convolve8_horiz;
+        sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg_horiz;
+      } else {
+        // Must always scale in both directions.
+        sf->highbd_predict[0][0][0] = vpx_highbd_convolve8;
+        sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg;
+        sf->highbd_predict[0][1][0] = vpx_highbd_convolve8;
+        sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg;
+        sf->highbd_predict[1][0][0] = vpx_highbd_convolve8;
+        sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg;
+      }
+    }
+    // 2D subpel motion always gets filtered in both directions.
+    sf->highbd_predict[1][1][0] = vpx_highbd_convolve8;
+    sf->highbd_predict[1][1][1] = vpx_highbd_convolve8_avg;
+  }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+}
diff --git a/av1/common/scale.h b/av1/common/scale.h
new file mode 100644
index 0000000..bb02601
--- /dev/null
+++ b/av1/common/scale.h
@@ -0,0 +1,79 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_SCALE_H_
+#define VP10_COMMON_SCALE_H_
+
+#include "av1/common/mv.h"
+#include "aom_dsp/vpx_convolve.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define REF_SCALE_SHIFT 14
+#define REF_NO_SCALE (1 << REF_SCALE_SHIFT)
+#define REF_INVALID_SCALE -1
+
+struct scale_factors {
+  int x_scale_fp;  // horizontal fixed point scale factor
+  int y_scale_fp;  // vertical fixed point scale factor
+  int x_step_q4;
+  int y_step_q4;
+
+  int (*scale_value_x)(int val, const struct scale_factors *sf);
+  int (*scale_value_y)(int val, const struct scale_factors *sf);
+
+  convolve_fn_t predict[2][2][2];  // horiz, vert, avg
+#if CONFIG_VP9_HIGHBITDEPTH
+  highbd_convolve_fn_t highbd_predict[2][2][2];  // horiz, vert, avg
+#endif                                           // CONFIG_VP9_HIGHBITDEPTH
+
+// Functions for non-interpolating filters (those that filter zero offsets)
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+  convolve_fn_t predict_ni[2][2][2];  // horiz, vert, avg
+#if CONFIG_VP9_HIGHBITDEPTH
+  highbd_convolve_fn_t highbd_predict_ni[2][2][2];  // horiz, vert, avg
+#endif                                              // CONFIG_VP9_HIGHBITDEPTH
+#endif  // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+};
+
+MV32 vp10_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
+                                        int other_h, int this_w, int this_h,
+                                        int use_high);
+#else
+void vp10_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
+                                        int other_h, int this_w, int this_h);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+static INLINE int vp10_is_valid_scale(const struct scale_factors *sf) {
+  return sf->x_scale_fp != REF_INVALID_SCALE &&
+         sf->y_scale_fp != REF_INVALID_SCALE;
+}
+
+static INLINE int vp10_is_scaled(const struct scale_factors *sf) {
+  return vp10_is_valid_scale(sf) &&
+         (sf->x_scale_fp != REF_NO_SCALE || sf->y_scale_fp != REF_NO_SCALE);
+}
+
+static INLINE int valid_ref_frame_size(int ref_width, int ref_height,
+                                       int this_width, int this_height) {
+  return 2 * this_width >= ref_width && 2 * this_height >= ref_height &&
+         this_width <= 16 * ref_width && this_height <= 16 * ref_height;
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_SCALE_H_
diff --git a/av1/common/scan.c b/av1/common/scan.c
new file mode 100644
index 0000000..dbc36eb
--- /dev/null
+++ b/av1/common/scan.c
@@ -0,0 +1,4169 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "av1/common/scan.h"
+
+DECLARE_ALIGNED(16, static const int16_t, default_scan_4x4[16]) = {
+  0, 4, 1, 5, 8, 2, 12, 9, 3, 6, 13, 10, 7, 14, 11, 15,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, mcol_scan_4x4[16]) = {
+  0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mrow_scan_4x4[16]) = {
+  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+};
+#endif  // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t, col_scan_4x4[16]) = {
+  0, 4, 8, 1, 12, 5, 9, 2, 13, 6, 10, 3, 7, 14, 11, 15,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, row_scan_4x4[16]) = {
+  0, 1, 4, 2, 5, 3, 6, 8, 9, 7, 12, 10, 13, 11, 14, 15,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, default_scan_4x8[32]) = {
+  0,  1,  4,  5,  2,  8,  6,  9,  10, 3,  12, 7,  13, 11, 14, 16,
+  17, 15, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mcol_scan_4x8[32]) = {
+  0, 4, 8,  12, 16, 20, 24, 28, 1, 5, 9,  13, 17, 21, 25, 29,
+  2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mrow_scan_4x8[32]) = {
+  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, default_scan_8x4[32]) = {
+  0,  1,  8,  9, 2,  16, 10, 17, 18, 3,  24, 11, 25, 19, 26, 4,
+  12, 27, 20, 5, 28, 13, 21, 29, 6,  14, 22, 30, 7,  15, 23, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x4[32]) = {
+  0, 8,  16, 24, 1, 9,  17, 25, 2, 10, 18, 26, 3, 11, 19, 27,
+  4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mrow_scan_8x4[32]) = {
+  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+};
+#endif  // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8[64]) = {
+  0,  8,  1,  16, 9,  2,  17, 24, 10, 3,  18, 25, 32, 11, 4,  26,
+  33, 19, 40, 12, 34, 27, 5,  41, 20, 48, 13, 35, 42, 28, 21, 6,
+  49, 56, 36, 43, 29, 7,  14, 50, 57, 44, 22, 37, 15, 51, 58, 30,
+  45, 23, 52, 59, 38, 31, 60, 53, 46, 39, 61, 54, 47, 62, 55, 63,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x8[64]) = {
+  0, 8,  16, 24, 32, 40, 48, 56, 1, 9,  17, 25, 33, 41, 49, 57,
+  2, 10, 18, 26, 34, 42, 50, 58, 3, 11, 19, 27, 35, 43, 51, 59,
+  4, 12, 20, 28, 36, 44, 52, 60, 5, 13, 21, 29, 37, 45, 53, 61,
+  6, 14, 22, 30, 38, 46, 54, 62, 7, 15, 23, 31, 39, 47, 55, 63,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mrow_scan_8x8[64]) = {
+  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+};
+#endif  // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t, col_scan_8x8[64]) = {
+  0,  8,  16, 1,  24, 9,  32, 17, 2,  40, 25, 10, 33, 18, 48, 3,
+  26, 41, 11, 56, 19, 34, 4,  49, 27, 42, 12, 35, 20, 57, 50, 28,
+  5,  43, 13, 36, 58, 51, 21, 44, 6,  29, 59, 37, 14, 52, 22, 7,
+  45, 60, 30, 15, 38, 53, 23, 46, 31, 61, 39, 54, 47, 62, 55, 63,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, row_scan_8x8[64]) = {
+  0,  1,  2,  8,  9,  3,  16, 10, 4,  17, 11, 24, 5,  18, 25, 12,
+  19, 26, 32, 6,  13, 20, 33, 27, 7,  34, 40, 21, 28, 41, 14, 35,
+  48, 42, 29, 36, 49, 22, 43, 15, 56, 37, 50, 44, 30, 57, 23, 51,
+  58, 45, 38, 52, 31, 59, 53, 46, 60, 39, 61, 47, 54, 55, 62, 63,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, default_scan_8x16[128]) = {
+  0,   1,   8,   2,   9,   16,  3,   10,  17,  24,  4,   11,  18,  25,  32,
+  5,   12,  19,  26,  33,  40,  6,   13,  20,  27,  34,  41,  48,  7,   14,
+  21,  28,  35,  42,  49,  56,  15,  22,  29,  36,  43,  50,  57,  64,  23,
+  30,  37,  44,  51,  58,  65,  72,  31,  38,  45,  52,  59,  66,  73,  80,
+  39,  46,  53,  60,  67,  74,  81,  88,  47,  54,  61,  68,  75,  82,  89,
+  96,  55,  62,  69,  76,  83,  90,  97,  104, 63,  70,  77,  84,  91,  98,
+  105, 112, 71,  78,  85,  92,  99,  106, 113, 120, 79,  86,  93,  100, 107,
+  114, 121, 87,  94,  101, 108, 115, 122, 95,  102, 109, 116, 123, 103, 110,
+  117, 124, 111, 118, 125, 119, 126, 127,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, default_scan_16x8[128]) = {
+  0,   1,  16,  2,   17,  32,  3,  18, 33,  48,  4,   19,  34,  49,  64,  5,
+  20,  35, 50,  65,  80,  6,   21, 36, 51,  66,  81,  96,  7,   22,  37,  52,
+  67,  82, 97,  112, 8,   23,  38, 53, 68,  83,  98,  113, 9,   24,  39,  54,
+  69,  84, 99,  114, 10,  25,  40, 55, 70,  85,  100, 115, 11,  26,  41,  56,
+  71,  86, 101, 116, 12,  27,  42, 57, 72,  87,  102, 117, 13,  28,  43,  58,
+  73,  88, 103, 118, 14,  29,  44, 59, 74,  89,  104, 119, 15,  30,  45,  60,
+  75,  90, 105, 120, 31,  46,  61, 76, 91,  106, 121, 47,  62,  77,  92,  107,
+  122, 63, 78,  93,  108, 123, 79, 94, 109, 124, 95,  110, 125, 111, 126, 127,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x16[128]) = {
+  0, 8,  16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96,  104, 112, 120,
+  1, 9,  17, 25, 33, 41, 49, 57, 65, 73, 81, 89, 97,  105, 113, 121,
+  2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98,  106, 114, 122,
+  3, 11, 19, 27, 35, 43, 51, 59, 67, 75, 83, 91, 99,  107, 115, 123,
+  4, 12, 20, 28, 36, 44, 52, 60, 68, 76, 84, 92, 100, 108, 116, 124,
+  5, 13, 21, 29, 37, 45, 53, 61, 69, 77, 85, 93, 101, 109, 117, 125,
+  6, 14, 22, 30, 38, 46, 54, 62, 70, 78, 86, 94, 102, 110, 118, 126,
+  7, 15, 23, 31, 39, 47, 55, 63, 71, 79, 87, 95, 103, 111, 119, 127,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mcol_scan_16x8[128]) = {
+  0,  16, 32, 48, 64, 80, 96,  112, 1,  17, 33, 49, 65, 81, 97,  113,
+  2,  18, 34, 50, 66, 82, 98,  114, 3,  19, 35, 51, 67, 83, 99,  115,
+  4,  20, 36, 52, 68, 84, 100, 116, 5,  21, 37, 53, 69, 85, 101, 117,
+  6,  22, 38, 54, 70, 86, 102, 118, 7,  23, 39, 55, 71, 87, 103, 119,
+  8,  24, 40, 56, 72, 88, 104, 120, 9,  25, 41, 57, 73, 89, 105, 121,
+  10, 26, 42, 58, 74, 90, 106, 122, 11, 27, 43, 59, 75, 91, 107, 123,
+  12, 28, 44, 60, 76, 92, 108, 124, 13, 29, 45, 61, 77, 93, 109, 125,
+  14, 30, 46, 62, 78, 94, 110, 126, 15, 31, 47, 63, 79, 95, 111, 127,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mrow_scan_8x16[128]) = {
+  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,
+  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
+  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+  90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
+  105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+  120, 121, 122, 123, 124, 125, 126, 127,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mrow_scan_16x8[128]) = {
+  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,
+  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
+  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+  90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
+  105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+  120, 121, 122, 123, 124, 125, 126, 127,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, default_scan_16x32[512]) = {
+  0,   1,   16,  2,   17,  32,  3,   18,  33,  48,  4,   19,  34,  49,  64,
+  5,   20,  35,  50,  65,  80,  6,   21,  36,  51,  66,  81,  96,  7,   22,
+  37,  52,  67,  82,  97,  112, 8,   23,  38,  53,  68,  83,  98,  113, 128,
+  9,   24,  39,  54,  69,  84,  99,  114, 129, 144, 10,  25,  40,  55,  70,
+  85,  100, 115, 130, 145, 160, 11,  26,  41,  56,  71,  86,  101, 116, 131,
+  146, 161, 176, 12,  27,  42,  57,  72,  87,  102, 117, 132, 147, 162, 177,
+  192, 13,  28,  43,  58,  73,  88,  103, 118, 133, 148, 163, 178, 193, 208,
+  14,  29,  44,  59,  74,  89,  104, 119, 134, 149, 164, 179, 194, 209, 224,
+  15,  30,  45,  60,  75,  90,  105, 120, 135, 150, 165, 180, 195, 210, 225,
+  240, 31,  46,  61,  76,  91,  106, 121, 136, 151, 166, 181, 196, 211, 226,
+  241, 256, 47,  62,  77,  92,  107, 122, 137, 152, 167, 182, 197, 212, 227,
+  242, 257, 272, 63,  78,  93,  108, 123, 138, 153, 168, 183, 198, 213, 228,
+  243, 258, 273, 288, 79,  94,  109, 124, 139, 154, 169, 184, 199, 214, 229,
+  244, 259, 274, 289, 304, 95,  110, 125, 140, 155, 170, 185, 200, 215, 230,
+  245, 260, 275, 290, 305, 320, 111, 126, 141, 156, 171, 186, 201, 216, 231,
+  246, 261, 276, 291, 306, 321, 336, 127, 142, 157, 172, 187, 202, 217, 232,
+  247, 262, 277, 292, 307, 322, 337, 352, 143, 158, 173, 188, 203, 218, 233,
+  248, 263, 278, 293, 308, 323, 338, 353, 368, 159, 174, 189, 204, 219, 234,
+  249, 264, 279, 294, 309, 324, 339, 354, 369, 384, 175, 190, 205, 220, 235,
+  250, 265, 280, 295, 310, 325, 340, 355, 370, 385, 400, 191, 206, 221, 236,
+  251, 266, 281, 296, 311, 326, 341, 356, 371, 386, 401, 416, 207, 222, 237,
+  252, 267, 282, 297, 312, 327, 342, 357, 372, 387, 402, 417, 432, 223, 238,
+  253, 268, 283, 298, 313, 328, 343, 358, 373, 388, 403, 418, 433, 448, 239,
+  254, 269, 284, 299, 314, 329, 344, 359, 374, 389, 404, 419, 434, 449, 464,
+  255, 270, 285, 300, 315, 330, 345, 360, 375, 390, 405, 420, 435, 450, 465,
+  480, 271, 286, 301, 316, 331, 346, 361, 376, 391, 406, 421, 436, 451, 466,
+  481, 496, 287, 302, 317, 332, 347, 362, 377, 392, 407, 422, 437, 452, 467,
+  482, 497, 303, 318, 333, 348, 363, 378, 393, 408, 423, 438, 453, 468, 483,
+  498, 319, 334, 349, 364, 379, 394, 409, 424, 439, 454, 469, 484, 499, 335,
+  350, 365, 380, 395, 410, 425, 440, 455, 470, 485, 500, 351, 366, 381, 396,
+  411, 426, 441, 456, 471, 486, 501, 367, 382, 397, 412, 427, 442, 457, 472,
+  487, 502, 383, 398, 413, 428, 443, 458, 473, 488, 503, 399, 414, 429, 444,
+  459, 474, 489, 504, 415, 430, 445, 460, 475, 490, 505, 431, 446, 461, 476,
+  491, 506, 447, 462, 477, 492, 507, 463, 478, 493, 508, 479, 494, 509, 495,
+  510, 511,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, default_scan_32x16[512]) = {
+  0,   1,   32,  2,   33,  64,  3,   34,  65,  96,  4,   35,  66,  97,  128,
+  5,   36,  67,  98,  129, 160, 6,   37,  68,  99,  130, 161, 192, 7,   38,
+  69,  100, 131, 162, 193, 224, 8,   39,  70,  101, 132, 163, 194, 225, 256,
+  9,   40,  71,  102, 133, 164, 195, 226, 257, 288, 10,  41,  72,  103, 134,
+  165, 196, 227, 258, 289, 320, 11,  42,  73,  104, 135, 166, 197, 228, 259,
+  290, 321, 352, 12,  43,  74,  105, 136, 167, 198, 229, 260, 291, 322, 353,
+  384, 13,  44,  75,  106, 137, 168, 199, 230, 261, 292, 323, 354, 385, 416,
+  14,  45,  76,  107, 138, 169, 200, 231, 262, 293, 324, 355, 386, 417, 448,
+  15,  46,  77,  108, 139, 170, 201, 232, 263, 294, 325, 356, 387, 418, 449,
+  480, 16,  47,  78,  109, 140, 171, 202, 233, 264, 295, 326, 357, 388, 419,
+  450, 481, 17,  48,  79,  110, 141, 172, 203, 234, 265, 296, 327, 358, 389,
+  420, 451, 482, 18,  49,  80,  111, 142, 173, 204, 235, 266, 297, 328, 359,
+  390, 421, 452, 483, 19,  50,  81,  112, 143, 174, 205, 236, 267, 298, 329,
+  360, 391, 422, 453, 484, 20,  51,  82,  113, 144, 175, 206, 237, 268, 299,
+  330, 361, 392, 423, 454, 485, 21,  52,  83,  114, 145, 176, 207, 238, 269,
+  300, 331, 362, 393, 424, 455, 486, 22,  53,  84,  115, 146, 177, 208, 239,
+  270, 301, 332, 363, 394, 425, 456, 487, 23,  54,  85,  116, 147, 178, 209,
+  240, 271, 302, 333, 364, 395, 426, 457, 488, 24,  55,  86,  117, 148, 179,
+  210, 241, 272, 303, 334, 365, 396, 427, 458, 489, 25,  56,  87,  118, 149,
+  180, 211, 242, 273, 304, 335, 366, 397, 428, 459, 490, 26,  57,  88,  119,
+  150, 181, 212, 243, 274, 305, 336, 367, 398, 429, 460, 491, 27,  58,  89,
+  120, 151, 182, 213, 244, 275, 306, 337, 368, 399, 430, 461, 492, 28,  59,
+  90,  121, 152, 183, 214, 245, 276, 307, 338, 369, 400, 431, 462, 493, 29,
+  60,  91,  122, 153, 184, 215, 246, 277, 308, 339, 370, 401, 432, 463, 494,
+  30,  61,  92,  123, 154, 185, 216, 247, 278, 309, 340, 371, 402, 433, 464,
+  495, 31,  62,  93,  124, 155, 186, 217, 248, 279, 310, 341, 372, 403, 434,
+  465, 496, 63,  94,  125, 156, 187, 218, 249, 280, 311, 342, 373, 404, 435,
+  466, 497, 95,  126, 157, 188, 219, 250, 281, 312, 343, 374, 405, 436, 467,
+  498, 127, 158, 189, 220, 251, 282, 313, 344, 375, 406, 437, 468, 499, 159,
+  190, 221, 252, 283, 314, 345, 376, 407, 438, 469, 500, 191, 222, 253, 284,
+  315, 346, 377, 408, 439, 470, 501, 223, 254, 285, 316, 347, 378, 409, 440,
+  471, 502, 255, 286, 317, 348, 379, 410, 441, 472, 503, 287, 318, 349, 380,
+  411, 442, 473, 504, 319, 350, 381, 412, 443, 474, 505, 351, 382, 413, 444,
+  475, 506, 383, 414, 445, 476, 507, 415, 446, 477, 508, 447, 478, 509, 479,
+  510, 511,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mcol_scan_16x32[512]) = {
+  0,   16,  32,  48,  64,  80,  96,  112, 128, 144, 160, 176, 192, 208, 224,
+  240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400, 416, 432, 448, 464,
+  480, 496, 1,   17,  33,  49,  65,  81,  97,  113, 129, 145, 161, 177, 193,
+  209, 225, 241, 257, 273, 289, 305, 321, 337, 353, 369, 385, 401, 417, 433,
+  449, 465, 481, 497, 2,   18,  34,  50,  66,  82,  98,  114, 130, 146, 162,
+  178, 194, 210, 226, 242, 258, 274, 290, 306, 322, 338, 354, 370, 386, 402,
+  418, 434, 450, 466, 482, 498, 3,   19,  35,  51,  67,  83,  99,  115, 131,
+  147, 163, 179, 195, 211, 227, 243, 259, 275, 291, 307, 323, 339, 355, 371,
+  387, 403, 419, 435, 451, 467, 483, 499, 4,   20,  36,  52,  68,  84,  100,
+  116, 132, 148, 164, 180, 196, 212, 228, 244, 260, 276, 292, 308, 324, 340,
+  356, 372, 388, 404, 420, 436, 452, 468, 484, 500, 5,   21,  37,  53,  69,
+  85,  101, 117, 133, 149, 165, 181, 197, 213, 229, 245, 261, 277, 293, 309,
+  325, 341, 357, 373, 389, 405, 421, 437, 453, 469, 485, 501, 6,   22,  38,
+  54,  70,  86,  102, 118, 134, 150, 166, 182, 198, 214, 230, 246, 262, 278,
+  294, 310, 326, 342, 358, 374, 390, 406, 422, 438, 454, 470, 486, 502, 7,
+  23,  39,  55,  71,  87,  103, 119, 135, 151, 167, 183, 199, 215, 231, 247,
+  263, 279, 295, 311, 327, 343, 359, 375, 391, 407, 423, 439, 455, 471, 487,
+  503, 8,   24,  40,  56,  72,  88,  104, 120, 136, 152, 168, 184, 200, 216,
+  232, 248, 264, 280, 296, 312, 328, 344, 360, 376, 392, 408, 424, 440, 456,
+  472, 488, 504, 9,   25,  41,  57,  73,  89,  105, 121, 137, 153, 169, 185,
+  201, 217, 233, 249, 265, 281, 297, 313, 329, 345, 361, 377, 393, 409, 425,
+  441, 457, 473, 489, 505, 10,  26,  42,  58,  74,  90,  106, 122, 138, 154,
+  170, 186, 202, 218, 234, 250, 266, 282, 298, 314, 330, 346, 362, 378, 394,
+  410, 426, 442, 458, 474, 490, 506, 11,  27,  43,  59,  75,  91,  107, 123,
+  139, 155, 171, 187, 203, 219, 235, 251, 267, 283, 299, 315, 331, 347, 363,
+  379, 395, 411, 427, 443, 459, 475, 491, 507, 12,  28,  44,  60,  76,  92,
+  108, 124, 140, 156, 172, 188, 204, 220, 236, 252, 268, 284, 300, 316, 332,
+  348, 364, 380, 396, 412, 428, 444, 460, 476, 492, 508, 13,  29,  45,  61,
+  77,  93,  109, 125, 141, 157, 173, 189, 205, 221, 237, 253, 269, 285, 301,
+  317, 333, 349, 365, 381, 397, 413, 429, 445, 461, 477, 493, 509, 14,  30,
+  46,  62,  78,  94,  110, 126, 142, 158, 174, 190, 206, 222, 238, 254, 270,
+  286, 302, 318, 334, 350, 366, 382, 398, 414, 430, 446, 462, 478, 494, 510,
+  15,  31,  47,  63,  79,  95,  111, 127, 143, 159, 175, 191, 207, 223, 239,
+  255, 271, 287, 303, 319, 335, 351, 367, 383, 399, 415, 431, 447, 463, 479,
+  495, 511,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mcol_scan_32x16[512]) = {
+  0,  32, 64, 96,  128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 480,
+  1,  33, 65, 97,  129, 161, 193, 225, 257, 289, 321, 353, 385, 417, 449, 481,
+  2,  34, 66, 98,  130, 162, 194, 226, 258, 290, 322, 354, 386, 418, 450, 482,
+  3,  35, 67, 99,  131, 163, 195, 227, 259, 291, 323, 355, 387, 419, 451, 483,
+  4,  36, 68, 100, 132, 164, 196, 228, 260, 292, 324, 356, 388, 420, 452, 484,
+  5,  37, 69, 101, 133, 165, 197, 229, 261, 293, 325, 357, 389, 421, 453, 485,
+  6,  38, 70, 102, 134, 166, 198, 230, 262, 294, 326, 358, 390, 422, 454, 486,
+  7,  39, 71, 103, 135, 167, 199, 231, 263, 295, 327, 359, 391, 423, 455, 487,
+  8,  40, 72, 104, 136, 168, 200, 232, 264, 296, 328, 360, 392, 424, 456, 488,
+  9,  41, 73, 105, 137, 169, 201, 233, 265, 297, 329, 361, 393, 425, 457, 489,
+  10, 42, 74, 106, 138, 170, 202, 234, 266, 298, 330, 362, 394, 426, 458, 490,
+  11, 43, 75, 107, 139, 171, 203, 235, 267, 299, 331, 363, 395, 427, 459, 491,
+  12, 44, 76, 108, 140, 172, 204, 236, 268, 300, 332, 364, 396, 428, 460, 492,
+  13, 45, 77, 109, 141, 173, 205, 237, 269, 301, 333, 365, 397, 429, 461, 493,
+  14, 46, 78, 110, 142, 174, 206, 238, 270, 302, 334, 366, 398, 430, 462, 494,
+  15, 47, 79, 111, 143, 175, 207, 239, 271, 303, 335, 367, 399, 431, 463, 495,
+  16, 48, 80, 112, 144, 176, 208, 240, 272, 304, 336, 368, 400, 432, 464, 496,
+  17, 49, 81, 113, 145, 177, 209, 241, 273, 305, 337, 369, 401, 433, 465, 497,
+  18, 50, 82, 114, 146, 178, 210, 242, 274, 306, 338, 370, 402, 434, 466, 498,
+  19, 51, 83, 115, 147, 179, 211, 243, 275, 307, 339, 371, 403, 435, 467, 499,
+  20, 52, 84, 116, 148, 180, 212, 244, 276, 308, 340, 372, 404, 436, 468, 500,
+  21, 53, 85, 117, 149, 181, 213, 245, 277, 309, 341, 373, 405, 437, 469, 501,
+  22, 54, 86, 118, 150, 182, 214, 246, 278, 310, 342, 374, 406, 438, 470, 502,
+  23, 55, 87, 119, 151, 183, 215, 247, 279, 311, 343, 375, 407, 439, 471, 503,
+  24, 56, 88, 120, 152, 184, 216, 248, 280, 312, 344, 376, 408, 440, 472, 504,
+  25, 57, 89, 121, 153, 185, 217, 249, 281, 313, 345, 377, 409, 441, 473, 505,
+  26, 58, 90, 122, 154, 186, 218, 250, 282, 314, 346, 378, 410, 442, 474, 506,
+  27, 59, 91, 123, 155, 187, 219, 251, 283, 315, 347, 379, 411, 443, 475, 507,
+  28, 60, 92, 124, 156, 188, 220, 252, 284, 316, 348, 380, 412, 444, 476, 508,
+  29, 61, 93, 125, 157, 189, 221, 253, 285, 317, 349, 381, 413, 445, 477, 509,
+  30, 62, 94, 126, 158, 190, 222, 254, 286, 318, 350, 382, 414, 446, 478, 510,
+  31, 63, 95, 127, 159, 191, 223, 255, 287, 319, 351, 383, 415, 447, 479, 511,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mrow_scan_16x32[512]) = {
+  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,
+  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
+  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+  90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
+  105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+  120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
+  135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+  150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
+  165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
+  180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
+  195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
+  210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
+  225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+  240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
+  255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269,
+  270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284,
+  285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299,
+  300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314,
+  315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329,
+  330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344,
+  345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359,
+  360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374,
+  375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389,
+  390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404,
+  405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419,
+  420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434,
+  435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449,
+  450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464,
+  465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479,
+  480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494,
+  495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509,
+  510, 511,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mrow_scan_32x16[512]) = {
+  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,
+  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
+  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+  90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
+  105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+  120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
+  135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+  150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
+  165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
+  180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
+  195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
+  210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
+  225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+  240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
+  255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269,
+  270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284,
+  285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299,
+  300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314,
+  315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329,
+  330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344,
+  345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359,
+  360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374,
+  375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389,
+  390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404,
+  405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419,
+  420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434,
+  435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449,
+  450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464,
+  465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479,
+  480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494,
+  495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509,
+  510, 511,
+};
+#endif  // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t, default_scan_16x16[256]) = {
+  0,   16,  1,   32,  17,  2,   48,  33,  18,  3,   64,  34,  49,  19,  65,
+  80,  50,  4,   35,  66,  20,  81,  96,  51,  5,   36,  82,  97,  67,  112,
+  21,  52,  98,  37,  83,  113, 6,   68,  128, 53,  22,  99,  114, 84,  7,
+  129, 38,  69,  100, 115, 144, 130, 85,  54,  23,  8,   145, 39,  70,  116,
+  101, 131, 160, 146, 55,  86,  24,  71,  132, 117, 161, 40,  9,   102, 147,
+  176, 162, 87,  56,  25,  133, 118, 177, 148, 72,  103, 41,  163, 10,  192,
+  178, 88,  57,  134, 149, 119, 26,  164, 73,  104, 193, 42,  179, 208, 11,
+  135, 89,  165, 120, 150, 58,  194, 180, 27,  74,  209, 105, 151, 136, 43,
+  90,  224, 166, 195, 181, 121, 210, 59,  12,  152, 106, 167, 196, 75,  137,
+  225, 211, 240, 182, 122, 91,  28,  197, 13,  226, 168, 183, 153, 44,  212,
+  138, 107, 241, 60,  29,  123, 198, 184, 227, 169, 242, 76,  213, 154, 45,
+  92,  14,  199, 139, 61,  228, 214, 170, 185, 243, 108, 77,  155, 30,  15,
+  200, 229, 124, 215, 244, 93,  46,  186, 171, 201, 109, 140, 230, 62,  216,
+  245, 31,  125, 78,  156, 231, 47,  187, 202, 217, 94,  246, 141, 63,  232,
+  172, 110, 247, 157, 79,  218, 203, 126, 233, 188, 248, 95,  173, 142, 219,
+  111, 249, 234, 158, 127, 189, 204, 250, 235, 143, 174, 220, 205, 159, 251,
+  190, 221, 175, 236, 237, 191, 206, 252, 222, 253, 207, 238, 223, 254, 239,
+  255,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, mcol_scan_16x16[256]) = {
+  0,  16, 32, 48, 64, 80, 96,  112, 128, 144, 160, 176, 192, 208, 224, 240,
+  1,  17, 33, 49, 65, 81, 97,  113, 129, 145, 161, 177, 193, 209, 225, 241,
+  2,  18, 34, 50, 66, 82, 98,  114, 130, 146, 162, 178, 194, 210, 226, 242,
+  3,  19, 35, 51, 67, 83, 99,  115, 131, 147, 163, 179, 195, 211, 227, 243,
+  4,  20, 36, 52, 68, 84, 100, 116, 132, 148, 164, 180, 196, 212, 228, 244,
+  5,  21, 37, 53, 69, 85, 101, 117, 133, 149, 165, 181, 197, 213, 229, 245,
+  6,  22, 38, 54, 70, 86, 102, 118, 134, 150, 166, 182, 198, 214, 230, 246,
+  7,  23, 39, 55, 71, 87, 103, 119, 135, 151, 167, 183, 199, 215, 231, 247,
+  8,  24, 40, 56, 72, 88, 104, 120, 136, 152, 168, 184, 200, 216, 232, 248,
+  9,  25, 41, 57, 73, 89, 105, 121, 137, 153, 169, 185, 201, 217, 233, 249,
+  10, 26, 42, 58, 74, 90, 106, 122, 138, 154, 170, 186, 202, 218, 234, 250,
+  11, 27, 43, 59, 75, 91, 107, 123, 139, 155, 171, 187, 203, 219, 235, 251,
+  12, 28, 44, 60, 76, 92, 108, 124, 140, 156, 172, 188, 204, 220, 236, 252,
+  13, 29, 45, 61, 77, 93, 109, 125, 141, 157, 173, 189, 205, 221, 237, 253,
+  14, 30, 46, 62, 78, 94, 110, 126, 142, 158, 174, 190, 206, 222, 238, 254,
+  15, 31, 47, 63, 79, 95, 111, 127, 143, 159, 175, 191, 207, 223, 239, 255,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mrow_scan_16x16[256]) = {
+  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,
+  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
+  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+  90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
+  105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+  120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
+  135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+  150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
+  165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
+  180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
+  195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
+  210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
+  225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+  240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
+  255,
+};
+#endif  // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t, col_scan_16x16[256]) = {
+  0,   16,  32,  48,  1,   64,  17,  80,  33,  96,  49,  2,   65,  112, 18,
+  81,  34,  128, 50,  97,  3,   66,  144, 19,  113, 35,  82,  160, 98,  51,
+  129, 4,   67,  176, 20,  114, 145, 83,  36,  99,  130, 52,  192, 5,   161,
+  68,  115, 21,  146, 84,  208, 177, 37,  131, 100, 53,  162, 224, 69,  6,
+  116, 193, 147, 85,  22,  240, 132, 38,  178, 101, 163, 54,  209, 117, 70,
+  7,   148, 194, 86,  179, 225, 23,  133, 39,  164, 8,   102, 210, 241, 55,
+  195, 118, 149, 71,  180, 24,  87,  226, 134, 165, 211, 40,  103, 56,  72,
+  150, 196, 242, 119, 9,   181, 227, 88,  166, 25,  135, 41,  104, 212, 57,
+  151, 197, 120, 73,  243, 182, 136, 167, 213, 89,  10,  228, 105, 152, 198,
+  26,  42,  121, 183, 244, 168, 58,  137, 229, 74,  214, 90,  153, 199, 184,
+  11,  106, 245, 27,  122, 230, 169, 43,  215, 59,  200, 138, 185, 246, 75,
+  12,  91,  154, 216, 231, 107, 28,  44,  201, 123, 170, 60,  247, 232, 76,
+  139, 13,  92,  217, 186, 248, 155, 108, 29,  124, 45,  202, 233, 171, 61,
+  14,  77,  140, 15,  249, 93,  30,  187, 156, 218, 46,  109, 125, 62,  172,
+  78,  203, 31,  141, 234, 94,  47,  188, 63,  157, 110, 250, 219, 79,  126,
+  204, 173, 142, 95,  189, 111, 235, 158, 220, 251, 127, 174, 143, 205, 236,
+  159, 190, 221, 252, 175, 206, 237, 191, 253, 222, 238, 207, 254, 223, 239,
+  255,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, row_scan_16x16[256]) = {
+  0,   1,   2,   16,  3,   17,  4,   18,  32,  5,   33,  19,  6,   34,  48,
+  20,  49,  7,   35,  21,  50,  64,  8,   36,  65,  22,  51,  37,  80,  9,
+  66,  52,  23,  38,  81,  67,  10,  53,  24,  82,  68,  96,  39,  11,  54,
+  83,  97,  69,  25,  98,  84,  40,  112, 55,  12,  70,  99,  113, 85,  26,
+  41,  56,  114, 100, 13,  71,  128, 86,  27,  115, 101, 129, 42,  57,  72,
+  116, 14,  87,  130, 102, 144, 73,  131, 117, 28,  58,  15,  88,  43,  145,
+  103, 132, 146, 118, 74,  160, 89,  133, 104, 29,  59,  147, 119, 44,  161,
+  148, 90,  105, 134, 162, 120, 176, 75,  135, 149, 30,  60,  163, 177, 45,
+  121, 91,  106, 164, 178, 150, 192, 136, 165, 179, 31,  151, 193, 76,  122,
+  61,  137, 194, 107, 152, 180, 208, 46,  166, 167, 195, 92,  181, 138, 209,
+  123, 153, 224, 196, 77,  168, 210, 182, 240, 108, 197, 62,  154, 225, 183,
+  169, 211, 47,  139, 93,  184, 226, 212, 241, 198, 170, 124, 155, 199, 78,
+  213, 185, 109, 227, 200, 63,  228, 242, 140, 214, 171, 186, 156, 229, 243,
+  125, 94,  201, 244, 215, 216, 230, 141, 187, 202, 79,  172, 110, 157, 245,
+  217, 231, 95,  246, 232, 126, 203, 247, 233, 173, 218, 142, 111, 158, 188,
+  248, 127, 234, 219, 249, 189, 204, 143, 174, 159, 250, 235, 205, 220, 175,
+  190, 251, 221, 191, 206, 236, 207, 237, 252, 222, 253, 223, 238, 239, 254,
+  255,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, mcol_scan_32x32[1024]) = {
+  0,   32,   64,  96,   128, 160,  192, 224,  256, 288,  320, 352,  384, 416,
+  448, 480,  512, 544,  576, 608,  640, 672,  704, 736,  768, 800,  832, 864,
+  896, 928,  960, 992,  1,   33,   65,  97,   129, 161,  193, 225,  257, 289,
+  321, 353,  385, 417,  449, 481,  513, 545,  577, 609,  641, 673,  705, 737,
+  769, 801,  833, 865,  897, 929,  961, 993,  2,   34,   66,  98,   130, 162,
+  194, 226,  258, 290,  322, 354,  386, 418,  450, 482,  514, 546,  578, 610,
+  642, 674,  706, 738,  770, 802,  834, 866,  898, 930,  962, 994,  3,   35,
+  67,  99,   131, 163,  195, 227,  259, 291,  323, 355,  387, 419,  451, 483,
+  515, 547,  579, 611,  643, 675,  707, 739,  771, 803,  835, 867,  899, 931,
+  963, 995,  4,   36,   68,  100,  132, 164,  196, 228,  260, 292,  324, 356,
+  388, 420,  452, 484,  516, 548,  580, 612,  644, 676,  708, 740,  772, 804,
+  836, 868,  900, 932,  964, 996,  5,   37,   69,  101,  133, 165,  197, 229,
+  261, 293,  325, 357,  389, 421,  453, 485,  517, 549,  581, 613,  645, 677,
+  709, 741,  773, 805,  837, 869,  901, 933,  965, 997,  6,   38,   70,  102,
+  134, 166,  198, 230,  262, 294,  326, 358,  390, 422,  454, 486,  518, 550,
+  582, 614,  646, 678,  710, 742,  774, 806,  838, 870,  902, 934,  966, 998,
+  7,   39,   71,  103,  135, 167,  199, 231,  263, 295,  327, 359,  391, 423,
+  455, 487,  519, 551,  583, 615,  647, 679,  711, 743,  775, 807,  839, 871,
+  903, 935,  967, 999,  8,   40,   72,  104,  136, 168,  200, 232,  264, 296,
+  328, 360,  392, 424,  456, 488,  520, 552,  584, 616,  648, 680,  712, 744,
+  776, 808,  840, 872,  904, 936,  968, 1000, 9,   41,   73,  105,  137, 169,
+  201, 233,  265, 297,  329, 361,  393, 425,  457, 489,  521, 553,  585, 617,
+  649, 681,  713, 745,  777, 809,  841, 873,  905, 937,  969, 1001, 10,  42,
+  74,  106,  138, 170,  202, 234,  266, 298,  330, 362,  394, 426,  458, 490,
+  522, 554,  586, 618,  650, 682,  714, 746,  778, 810,  842, 874,  906, 938,
+  970, 1002, 11,  43,   75,  107,  139, 171,  203, 235,  267, 299,  331, 363,
+  395, 427,  459, 491,  523, 555,  587, 619,  651, 683,  715, 747,  779, 811,
+  843, 875,  907, 939,  971, 1003, 12,  44,   76,  108,  140, 172,  204, 236,
+  268, 300,  332, 364,  396, 428,  460, 492,  524, 556,  588, 620,  652, 684,
+  716, 748,  780, 812,  844, 876,  908, 940,  972, 1004, 13,  45,   77,  109,
+  141, 173,  205, 237,  269, 301,  333, 365,  397, 429,  461, 493,  525, 557,
+  589, 621,  653, 685,  717, 749,  781, 813,  845, 877,  909, 941,  973, 1005,
+  14,  46,   78,  110,  142, 174,  206, 238,  270, 302,  334, 366,  398, 430,
+  462, 494,  526, 558,  590, 622,  654, 686,  718, 750,  782, 814,  846, 878,
+  910, 942,  974, 1006, 15,  47,   79,  111,  143, 175,  207, 239,  271, 303,
+  335, 367,  399, 431,  463, 495,  527, 559,  591, 623,  655, 687,  719, 751,
+  783, 815,  847, 879,  911, 943,  975, 1007, 16,  48,   80,  112,  144, 176,
+  208, 240,  272, 304,  336, 368,  400, 432,  464, 496,  528, 560,  592, 624,
+  656, 688,  720, 752,  784, 816,  848, 880,  912, 944,  976, 1008, 17,  49,
+  81,  113,  145, 177,  209, 241,  273, 305,  337, 369,  401, 433,  465, 497,
+  529, 561,  593, 625,  657, 689,  721, 753,  785, 817,  849, 881,  913, 945,
+  977, 1009, 18,  50,   82,  114,  146, 178,  210, 242,  274, 306,  338, 370,
+  402, 434,  466, 498,  530, 562,  594, 626,  658, 690,  722, 754,  786, 818,
+  850, 882,  914, 946,  978, 1010, 19,  51,   83,  115,  147, 179,  211, 243,
+  275, 307,  339, 371,  403, 435,  467, 499,  531, 563,  595, 627,  659, 691,
+  723, 755,  787, 819,  851, 883,  915, 947,  979, 1011, 20,  52,   84,  116,
+  148, 180,  212, 244,  276, 308,  340, 372,  404, 436,  468, 500,  532, 564,
+  596, 628,  660, 692,  724, 756,  788, 820,  852, 884,  916, 948,  980, 1012,
+  21,  53,   85,  117,  149, 181,  213, 245,  277, 309,  341, 373,  405, 437,
+  469, 501,  533, 565,  597, 629,  661, 693,  725, 757,  789, 821,  853, 885,
+  917, 949,  981, 1013, 22,  54,   86,  118,  150, 182,  214, 246,  278, 310,
+  342, 374,  406, 438,  470, 502,  534, 566,  598, 630,  662, 694,  726, 758,
+  790, 822,  854, 886,  918, 950,  982, 1014, 23,  55,   87,  119,  151, 183,
+  215, 247,  279, 311,  343, 375,  407, 439,  471, 503,  535, 567,  599, 631,
+  663, 695,  727, 759,  791, 823,  855, 887,  919, 951,  983, 1015, 24,  56,
+  88,  120,  152, 184,  216, 248,  280, 312,  344, 376,  408, 440,  472, 504,
+  536, 568,  600, 632,  664, 696,  728, 760,  792, 824,  856, 888,  920, 952,
+  984, 1016, 25,  57,   89,  121,  153, 185,  217, 249,  281, 313,  345, 377,
+  409, 441,  473, 505,  537, 569,  601, 633,  665, 697,  729, 761,  793, 825,
+  857, 889,  921, 953,  985, 1017, 26,  58,   90,  122,  154, 186,  218, 250,
+  282, 314,  346, 378,  410, 442,  474, 506,  538, 570,  602, 634,  666, 698,
+  730, 762,  794, 826,  858, 890,  922, 954,  986, 1018, 27,  59,   91,  123,
+  155, 187,  219, 251,  283, 315,  347, 379,  411, 443,  475, 507,  539, 571,
+  603, 635,  667, 699,  731, 763,  795, 827,  859, 891,  923, 955,  987, 1019,
+  28,  60,   92,  124,  156, 188,  220, 252,  284, 316,  348, 380,  412, 444,
+  476, 508,  540, 572,  604, 636,  668, 700,  732, 764,  796, 828,  860, 892,
+  924, 956,  988, 1020, 29,  61,   93,  125,  157, 189,  221, 253,  285, 317,
+  349, 381,  413, 445,  477, 509,  541, 573,  605, 637,  669, 701,  733, 765,
+  797, 829,  861, 893,  925, 957,  989, 1021, 30,  62,   94,  126,  158, 190,
+  222, 254,  286, 318,  350, 382,  414, 446,  478, 510,  542, 574,  606, 638,
+  670, 702,  734, 766,  798, 830,  862, 894,  926, 958,  990, 1022, 31,  63,
+  95,  127,  159, 191,  223, 255,  287, 319,  351, 383,  415, 447,  479, 511,
+  543, 575,  607, 639,  671, 703,  735, 767,  799, 831,  863, 895,  927, 959,
+  991, 1023,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mrow_scan_32x32[1024]) = {
+  0,    1,    2,    3,    4,    5,    6,    7,    8,    9,    10,   11,   12,
+  13,   14,   15,   16,   17,   18,   19,   20,   21,   22,   23,   24,   25,
+  26,   27,   28,   29,   30,   31,   32,   33,   34,   35,   36,   37,   38,
+  39,   40,   41,   42,   43,   44,   45,   46,   47,   48,   49,   50,   51,
+  52,   53,   54,   55,   56,   57,   58,   59,   60,   61,   62,   63,   64,
+  65,   66,   67,   68,   69,   70,   71,   72,   73,   74,   75,   76,   77,
+  78,   79,   80,   81,   82,   83,   84,   85,   86,   87,   88,   89,   90,
+  91,   92,   93,   94,   95,   96,   97,   98,   99,   100,  101,  102,  103,
+  104,  105,  106,  107,  108,  109,  110,  111,  112,  113,  114,  115,  116,
+  117,  118,  119,  120,  121,  122,  123,  124,  125,  126,  127,  128,  129,
+  130,  131,  132,  133,  134,  135,  136,  137,  138,  139,  140,  141,  142,
+  143,  144,  145,  146,  147,  148,  149,  150,  151,  152,  153,  154,  155,
+  156,  157,  158,  159,  160,  161,  162,  163,  164,  165,  166,  167,  168,
+  169,  170,  171,  172,  173,  174,  175,  176,  177,  178,  179,  180,  181,
+  182,  183,  184,  185,  186,  187,  188,  189,  190,  191,  192,  193,  194,
+  195,  196,  197,  198,  199,  200,  201,  202,  203,  204,  205,  206,  207,
+  208,  209,  210,  211,  212,  213,  214,  215,  216,  217,  218,  219,  220,
+  221,  222,  223,  224,  225,  226,  227,  228,  229,  230,  231,  232,  233,
+  234,  235,  236,  237,  238,  239,  240,  241,  242,  243,  244,  245,  246,
+  247,  248,  249,  250,  251,  252,  253,  254,  255,  256,  257,  258,  259,
+  260,  261,  262,  263,  264,  265,  266,  267,  268,  269,  270,  271,  272,
+  273,  274,  275,  276,  277,  278,  279,  280,  281,  282,  283,  284,  285,
+  286,  287,  288,  289,  290,  291,  292,  293,  294,  295,  296,  297,  298,
+  299,  300,  301,  302,  303,  304,  305,  306,  307,  308,  309,  310,  311,
+  312,  313,  314,  315,  316,  317,  318,  319,  320,  321,  322,  323,  324,
+  325,  326,  327,  328,  329,  330,  331,  332,  333,  334,  335,  336,  337,
+  338,  339,  340,  341,  342,  343,  344,  345,  346,  347,  348,  349,  350,
+  351,  352,  353,  354,  355,  356,  357,  358,  359,  360,  361,  362,  363,
+  364,  365,  366,  367,  368,  369,  370,  371,  372,  373,  374,  375,  376,
+  377,  378,  379,  380,  381,  382,  383,  384,  385,  386,  387,  388,  389,
+  390,  391,  392,  393,  394,  395,  396,  397,  398,  399,  400,  401,  402,
+  403,  404,  405,  406,  407,  408,  409,  410,  411,  412,  413,  414,  415,
+  416,  417,  418,  419,  420,  421,  422,  423,  424,  425,  426,  427,  428,
+  429,  430,  431,  432,  433,  434,  435,  436,  437,  438,  439,  440,  441,
+  442,  443,  444,  445,  446,  447,  448,  449,  450,  451,  452,  453,  454,
+  455,  456,  457,  458,  459,  460,  461,  462,  463,  464,  465,  466,  467,
+  468,  469,  470,  471,  472,  473,  474,  475,  476,  477,  478,  479,  480,
+  481,  482,  483,  484,  485,  486,  487,  488,  489,  490,  491,  492,  493,
+  494,  495,  496,  497,  498,  499,  500,  501,  502,  503,  504,  505,  506,
+  507,  508,  509,  510,  511,  512,  513,  514,  515,  516,  517,  518,  519,
+  520,  521,  522,  523,  524,  525,  526,  527,  528,  529,  530,  531,  532,
+  533,  534,  535,  536,  537,  538,  539,  540,  541,  542,  543,  544,  545,
+  546,  547,  548,  549,  550,  551,  552,  553,  554,  555,  556,  557,  558,
+  559,  560,  561,  562,  563,  564,  565,  566,  567,  568,  569,  570,  571,
+  572,  573,  574,  575,  576,  577,  578,  579,  580,  581,  582,  583,  584,
+  585,  586,  587,  588,  589,  590,  591,  592,  593,  594,  595,  596,  597,
+  598,  599,  600,  601,  602,  603,  604,  605,  606,  607,  608,  609,  610,
+  611,  612,  613,  614,  615,  616,  617,  618,  619,  620,  621,  622,  623,
+  624,  625,  626,  627,  628,  629,  630,  631,  632,  633,  634,  635,  636,
+  637,  638,  639,  640,  641,  642,  643,  644,  645,  646,  647,  648,  649,
+  650,  651,  652,  653,  654,  655,  656,  657,  658,  659,  660,  661,  662,
+  663,  664,  665,  666,  667,  668,  669,  670,  671,  672,  673,  674,  675,
+  676,  677,  678,  679,  680,  681,  682,  683,  684,  685,  686,  687,  688,
+  689,  690,  691,  692,  693,  694,  695,  696,  697,  698,  699,  700,  701,
+  702,  703,  704,  705,  706,  707,  708,  709,  710,  711,  712,  713,  714,
+  715,  716,  717,  718,  719,  720,  721,  722,  723,  724,  725,  726,  727,
+  728,  729,  730,  731,  732,  733,  734,  735,  736,  737,  738,  739,  740,
+  741,  742,  743,  744,  745,  746,  747,  748,  749,  750,  751,  752,  753,
+  754,  755,  756,  757,  758,  759,  760,  761,  762,  763,  764,  765,  766,
+  767,  768,  769,  770,  771,  772,  773,  774,  775,  776,  777,  778,  779,
+  780,  781,  782,  783,  784,  785,  786,  787,  788,  789,  790,  791,  792,
+  793,  794,  795,  796,  797,  798,  799,  800,  801,  802,  803,  804,  805,
+  806,  807,  808,  809,  810,  811,  812,  813,  814,  815,  816,  817,  818,
+  819,  820,  821,  822,  823,  824,  825,  826,  827,  828,  829,  830,  831,
+  832,  833,  834,  835,  836,  837,  838,  839,  840,  841,  842,  843,  844,
+  845,  846,  847,  848,  849,  850,  851,  852,  853,  854,  855,  856,  857,
+  858,  859,  860,  861,  862,  863,  864,  865,  866,  867,  868,  869,  870,
+  871,  872,  873,  874,  875,  876,  877,  878,  879,  880,  881,  882,  883,
+  884,  885,  886,  887,  888,  889,  890,  891,  892,  893,  894,  895,  896,
+  897,  898,  899,  900,  901,  902,  903,  904,  905,  906,  907,  908,  909,
+  910,  911,  912,  913,  914,  915,  916,  917,  918,  919,  920,  921,  922,
+  923,  924,  925,  926,  927,  928,  929,  930,  931,  932,  933,  934,  935,
+  936,  937,  938,  939,  940,  941,  942,  943,  944,  945,  946,  947,  948,
+  949,  950,  951,  952,  953,  954,  955,  956,  957,  958,  959,  960,  961,
+  962,  963,  964,  965,  966,  967,  968,  969,  970,  971,  972,  973,  974,
+  975,  976,  977,  978,  979,  980,  981,  982,  983,  984,  985,  986,  987,
+  988,  989,  990,  991,  992,  993,  994,  995,  996,  997,  998,  999,  1000,
+  1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013,
+  1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,
+};
+#endif  // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t, default_scan_32x32[1024]) = {
+  0,    32,   1,    64,  33,   2,    96,   65,   34,   128,  3,    97,   66,
+  160,  129,  35,   98,  4,    67,   130,  161,  192,  36,   99,   224,  5,
+  162,  193,  68,   131, 37,   100,  225,  194,  256,  163,  69,   132,  6,
+  226,  257,  288,  195, 101,  164,  38,   258,  7,    227,  289,  133,  320,
+  70,   196,  165,  290, 259,  228,  39,   321,  102,  352,  8,    197,  71,
+  134,  322,  291,  260, 353,  384,  229,  166,  103,  40,   354,  323,  292,
+  135,  385,  198,  261, 72,   9,    416,  167,  386,  355,  230,  324,  104,
+  293,  41,   417,  199, 136,  262,  387,  448,  325,  356,  10,   73,   418,
+  231,  168,  449,  294, 388,  105,  419,  263,  42,   200,  357,  450,  137,
+  480,  74,   326,  232, 11,   389,  169,  295,  420,  106,  451,  481,  358,
+  264,  327,  201,  43,  138,  512,  482,  390,  296,  233,  170,  421,  75,
+  452,  359,  12,   513, 265,  483,  328,  107,  202,  514,  544,  422,  391,
+  453,  139,  44,   234, 484,  297,  360,  171,  76,   515,  545,  266,  329,
+  454,  13,   423,  203, 108,  546,  485,  576,  298,  235,  140,  361,  330,
+  172,  547,  45,   455, 267,  577,  486,  77,   204,  362,  608,  14,   299,
+  578,  109,  236,  487, 609,  331,  141,  579,  46,   15,   173,  610,  363,
+  78,   205,  16,   110, 237,  611,  142,  47,   174,  79,   206,  17,   111,
+  238,  48,   143,  80,  175,  112,  207,  49,   18,   239,  81,   113,  19,
+  50,   82,   114,  51,  83,   115,  640,  516,  392,  268,  144,  20,   672,
+  641,  548,  517,  424, 393,  300,  269,  176,  145,  52,   21,   704,  673,
+  642,  580,  549,  518, 456,  425,  394,  332,  301,  270,  208,  177,  146,
+  84,   53,   22,   736, 705,  674,  643,  612,  581,  550,  519,  488,  457,
+  426,  395,  364,  333, 302,  271,  240,  209,  178,  147,  116,  85,   54,
+  23,   737,  706,  675, 613,  582,  551,  489,  458,  427,  365,  334,  303,
+  241,  210,  179,  117, 86,   55,   738,  707,  614,  583,  490,  459,  366,
+  335,  242,  211,  118, 87,   739,  615,  491,  367,  243,  119,  768,  644,
+  520,  396,  272,  148, 24,   800,  769,  676,  645,  552,  521,  428,  397,
+  304,  273,  180,  149, 56,   25,   832,  801,  770,  708,  677,  646,  584,
+  553,  522,  460,  429, 398,  336,  305,  274,  212,  181,  150,  88,   57,
+  26,   864,  833,  802, 771,  740,  709,  678,  647,  616,  585,  554,  523,
+  492,  461,  430,  399, 368,  337,  306,  275,  244,  213,  182,  151,  120,
+  89,   58,   27,   865, 834,  803,  741,  710,  679,  617,  586,  555,  493,
+  462,  431,  369,  338, 307,  245,  214,  183,  121,  90,   59,   866,  835,
+  742,  711,  618,  587, 494,  463,  370,  339,  246,  215,  122,  91,   867,
+  743,  619,  495,  371, 247,  123,  896,  772,  648,  524,  400,  276,  152,
+  28,   928,  897,  804, 773,  680,  649,  556,  525,  432,  401,  308,  277,
+  184,  153,  60,   29,  960,  929,  898,  836,  805,  774,  712,  681,  650,
+  588,  557,  526,  464, 433,  402,  340,  309,  278,  216,  185,  154,  92,
+  61,   30,   992,  961, 930,  899,  868,  837,  806,  775,  744,  713,  682,
+  651,  620,  589,  558, 527,  496,  465,  434,  403,  372,  341,  310,  279,
+  248,  217,  186,  155, 124,  93,   62,   31,   993,  962,  931,  869,  838,
+  807,  745,  714,  683, 621,  590,  559,  497,  466,  435,  373,  342,  311,
+  249,  218,  187,  125, 94,   63,   994,  963,  870,  839,  746,  715,  622,
+  591,  498,  467,  374, 343,  250,  219,  126,  95,   995,  871,  747,  623,
+  499,  375,  251,  127, 900,  776,  652,  528,  404,  280,  156,  932,  901,
+  808,  777,  684,  653, 560,  529,  436,  405,  312,  281,  188,  157,  964,
+  933,  902,  840,  809, 778,  716,  685,  654,  592,  561,  530,  468,  437,
+  406,  344,  313,  282, 220,  189,  158,  996,  965,  934,  903,  872,  841,
+  810,  779,  748,  717, 686,  655,  624,  593,  562,  531,  500,  469,  438,
+  407,  376,  345,  314, 283,  252,  221,  190,  159,  997,  966,  935,  873,
+  842,  811,  749,  718, 687,  625,  594,  563,  501,  470,  439,  377,  346,
+  315,  253,  222,  191, 998,  967,  874,  843,  750,  719,  626,  595,  502,
+  471,  378,  347,  254, 223,  999,  875,  751,  627,  503,  379,  255,  904,
+  780,  656,  532,  408, 284,  936,  905,  812,  781,  688,  657,  564,  533,
+  440,  409,  316,  285, 968,  937,  906,  844,  813,  782,  720,  689,  658,
+  596,  565,  534,  472, 441,  410,  348,  317,  286,  1000, 969,  938,  907,
+  876,  845,  814,  783, 752,  721,  690,  659,  628,  597,  566,  535,  504,
+  473,  442,  411,  380, 349,  318,  287,  1001, 970,  939,  877,  846,  815,
+  753,  722,  691,  629, 598,  567,  505,  474,  443,  381,  350,  319,  1002,
+  971,  878,  847,  754, 723,  630,  599,  506,  475,  382,  351,  1003, 879,
+  755,  631,  507,  383, 908,  784,  660,  536,  412,  940,  909,  816,  785,
+  692,  661,  568,  537, 444,  413,  972,  941,  910,  848,  817,  786,  724,
+  693,  662,  600,  569, 538,  476,  445,  414,  1004, 973,  942,  911,  880,
+  849,  818,  787,  756, 725,  694,  663,  632,  601,  570,  539,  508,  477,
+  446,  415,  1005, 974, 943,  881,  850,  819,  757,  726,  695,  633,  602,
+  571,  509,  478,  447, 1006, 975,  882,  851,  758,  727,  634,  603,  510,
+  479,  1007, 883,  759, 635,  511,  912,  788,  664,  540,  944,  913,  820,
+  789,  696,  665,  572, 541,  976,  945,  914,  852,  821,  790,  728,  697,
+  666,  604,  573,  542, 1008, 977,  946,  915,  884,  853,  822,  791,  760,
+  729,  698,  667,  636, 605,  574,  543,  1009, 978,  947,  885,  854,  823,
+  761,  730,  699,  637, 606,  575,  1010, 979,  886,  855,  762,  731,  638,
+  607,  1011, 887,  763, 639,  916,  792,  668,  948,  917,  824,  793,  700,
+  669,  980,  949,  918, 856,  825,  794,  732,  701,  670,  1012, 981,  950,
+  919,  888,  857,  826, 795,  764,  733,  702,  671,  1013, 982,  951,  889,
+  858,  827,  765,  734, 703,  1014, 983,  890,  859,  766,  735,  1015, 891,
+  767,  920,  796,  952, 921,  828,  797,  984,  953,  922,  860,  829,  798,
+  1016, 985,  954,  923, 892,  861,  830,  799,  1017, 986,  955,  893,  862,
+  831,  1018, 987,  894, 863,  1019, 895,  924,  956,  925,  988,  957,  926,
+  1020, 989,  958,  927, 1021, 990,  959,  1022, 991,  1023,
+};
+
+#if CONFIG_EXT_TX
+// Scan over two rectangular vertical partitions one after the other
+DECLARE_ALIGNED(16, static const int16_t, v2_scan_32x32[1024]) = {
+  0,    1,    32,   33,   2,    64,   34,   65,   66,   3,    96,   35,   97,
+  67,   98,   4,    128,  36,   129,  99,   68,   130,  5,    100,  131,  160,
+  37,   161,  69,   162,  132,  101,  163,  6,    192,  38,   193,  70,   194,
+  133,  164,  102,  195,  7,    224,  39,   165,  225,  134,  196,  71,   226,
+  103,  227,  166,  197,  8,    256,  40,   135,  228,  257,  72,   258,  198,
+  104,  259,  167,  229,  136,  260,  9,    288,  41,   289,  73,   199,  230,
+  290,  168,  261,  105,  291,  137,  292,  231,  10,   200,  262,  320,  42,
+  321,  74,   322,  169,  293,  106,  323,  232,  263,  138,  324,  201,  294,
+  11,   352,  43,   353,  75,   170,  325,  354,  264,  107,  233,  295,  355,
+  202,  326,  139,  356,  12,   384,  44,   265,  296,  385,  171,  357,  76,
+  386,  234,  327,  108,  387,  203,  358,  140,  388,  297,  266,  328,  13,
+  172,  389,  416,  45,   235,  359,  417,  77,   418,  109,  419,  204,  390,
+  298,  329,  141,  267,  360,  420,  236,  391,  173,  421,  14,   448,  46,
+  449,  78,   330,  450,  299,  361,  110,  205,  422,  451,  268,  392,  142,
+  452,  237,  423,  174,  331,  362,  453,  15,   300,  393,  480,  47,   481,
+  79,   482,  206,  454,  269,  424,  111,  483,  143,  484,  363,  332,  394,
+  238,  455,  175,  301,  425,  485,  512,  513,  270,  456,  514,  207,  486,
+  364,  395,  515,  333,  426,  516,  239,  487,  302,  457,  517,  396,  271,
+  488,  544,  365,  427,  545,  518,  546,  334,  458,  547,  519,  548,  303,
+  489,  397,  428,  549,  366,  459,  520,  576,  335,  490,  550,  577,  578,
+  579,  521,  429,  551,  398,  460,  580,  367,  491,  581,  552,  522,  582,
+  608,  609,  430,  461,  610,  399,  492,  553,  611,  583,  523,  612,  613,
+  584,  554,  462,  431,  493,  614,  524,  640,  641,  642,  585,  643,  555,
+  615,  644,  463,  494,  586,  525,  616,  645,  556,  646,  672,  617,  673,
+  587,  674,  647,  495,  675,  526,  676,  557,  618,  648,  677,  588,  678,
+  527,  649,  619,  704,  558,  705,  706,  679,  589,  707,  650,  708,  620,
+  680,  709,  559,  590,  710,  651,  681,  736,  621,  737,  711,  738,  739,
+  682,  652,  740,  712,  591,  741,  622,  683,  713,  742,  653,  768,  769,
+  743,  770,  714,  684,  771,  623,  772,  744,  654,  773,  715,  685,  745,
+  774,  655,  775,  800,  801,  716,  746,  802,  803,  686,  776,  804,  747,
+  805,  717,  777,  806,  687,  748,  807,  778,  832,  833,  718,  834,  835,
+  808,  836,  779,  749,  837,  809,  719,  838,  780,  750,  810,  839,  864,
+  865,  866,  867,  840,  781,  868,  811,  751,  869,  841,  870,  812,  782,
+  842,  871,  896,  897,  898,  872,  899,  813,  843,  900,  783,  901,  873,
+  844,  902,  814,  874,  903,  928,  929,  845,  930,  904,  815,  875,  931,
+  932,  905,  933,  846,  876,  934,  906,  935,  877,  960,  847,  961,  962,
+  907,  936,  963,  964,  937,  878,  965,  908,  966,  938,  967,  909,  879,
+  992,  939,  993,  968,  994,  995,  996,  910,  969,  940,  997,  998,  970,
+  911,  941,  999,  971,  1000, 942,  1001, 972,  1002, 943,  973,  1003, 974,
+  1004, 975,  1005, 1006, 1007, 16,   48,   80,   112,  144,  176,  17,   49,
+  208,  81,   113,  145,  240,  177,  272,  18,   50,   209,  82,   114,  304,
+  241,  146,  178,  273,  336,  210,  19,   51,   83,   115,  305,  242,  147,
+  368,  179,  274,  337,  211,  20,   400,  52,   84,   306,  116,  243,  369,
+  148,  338,  180,  275,  432,  401,  212,  21,   53,   307,  85,   370,  244,
+  117,  464,  149,  433,  339,  276,  181,  402,  213,  308,  496,  371,  22,
+  54,   465,  86,   245,  118,  434,  150,  340,  277,  403,  182,  528,  497,
+  214,  466,  372,  309,  23,   55,   435,  87,   246,  119,  341,  404,  151,
+  529,  560,  278,  498,  183,  467,  373,  215,  310,  436,  24,   56,   247,
+  561,  88,   530,  592,  342,  120,  405,  499,  152,  279,  468,  184,  374,
+  311,  437,  216,  562,  593,  531,  624,  25,   248,  500,  57,   406,  89,
+  343,  121,  469,  280,  153,  594,  185,  375,  563,  625,  438,  532,  656,
+  312,  217,  501,  407,  249,  26,   344,  58,   90,   470,  122,  595,  626,
+  281,  564,  657,  154,  376,  533,  688,  439,  186,  313,  502,  218,  408,
+  627,  596,  658,  250,  345,  471,  27,   59,   565,  689,  91,   123,  282,
+  534,  720,  155,  440,  377,  187,  503,  314,  628,  659,  219,  597,  690,
+  409,  472,  566,  721,  346,  251,  28,   60,   535,  752,  92,   124,  283,
+  441,  378,  156,  660,  504,  629,  691,  598,  722,  188,  315,  567,  753,
+  220,  410,  473,  347,  536,  784,  252,  29,   661,  692,  61,   93,   442,
+  630,  723,  284,  125,  379,  505,  599,  754,  157,  316,  568,  785,  189,
+  474,  411,  221,  537,  816,  693,  348,  662,  724,  253,  631,  755,  443,
+  30,   600,  786,  62,   506,  94,   285,  380,  126,  569,  817,  158,  317,
+  190,  475,  694,  725,  412,  663,  756,  538,  848,  222,  632,  787,  349,
+  254,  601,  818,  444,  507,  31,   63,   381,  286,  95,   570,  849,  726,
+  127,  695,  757,  664,  788,  159,  476,  318,  413,  539,  880,  191,  633,
+  819,  223,  350,  602,  850,  508,  255,  445,  727,  758,  696,  789,  571,
+  881,  382,  287,  665,  820,  477,  634,  851,  540,  912,  319,  414,  603,
+  882,  759,  728,  790,  351,  509,  697,  821,  446,  572,  913,  666,  852,
+  383,  635,  883,  478,  541,  944,  415,  760,  791,  604,  914,  729,  822,
+  698,  853,  510,  667,  884,  447,  573,  945,  636,  915,  792,  761,  823,
+  542,  976,  479,  730,  854,  605,  946,  699,  885,  668,  916,  511,  574,
+  977,  793,  824,  637,  947,  762,  855,  731,  886,  543,  1008, 606,  978,
+  700,  917,  669,  948,  575,  825,  1009, 794,  856,  763,  887,  638,  979,
+  732,  918,  701,  949,  607,  1010, 670,  980,  826,  857,  795,  888,  764,
+  919,  639,  1011, 733,  950,  702,  981,  858,  827,  889,  796,  920,  671,
+  1012, 765,  951,  734,  982,  703,  1013, 859,  890,  828,  921,  797,  952,
+  766,  983,  735,  1014, 891,  860,  922,  829,  953,  798,  984,  767,  1015,
+  892,  923,  861,  954,  830,  985,  799,  1016, 924,  893,  955,  862,  986,
+  831,  1017, 925,  956,  894,  987,  863,  1018, 957,  926,  988,  895,  1019,
+  958,  989,  927,  1020, 990,  959,  1021, 991,  1022, 1023,
+};
+
+// Scan over two rectangular horizontal partitions one after the other
+DECLARE_ALIGNED(16, static const int16_t, h2_scan_32x32[1024]) = {
+  0,    1,    32,   33,   2,    64,   34,   65,   66,   3,    96,   35,   97,
+  67,   98,   4,    128,  36,   129,  99,   68,   130,  5,    100,  131,  160,
+  37,   161,  69,   162,  132,  101,  163,  6,    192,  38,   193,  70,   194,
+  133,  164,  102,  195,  7,    224,  39,   165,  225,  134,  196,  71,   226,
+  103,  227,  166,  197,  8,    256,  40,   135,  228,  257,  72,   258,  198,
+  104,  259,  167,  229,  136,  260,  9,    288,  41,   289,  73,   199,  230,
+  290,  168,  261,  105,  291,  137,  292,  231,  10,   200,  262,  320,  42,
+  321,  74,   322,  169,  293,  106,  323,  232,  263,  138,  324,  201,  294,
+  11,   352,  43,   353,  75,   170,  325,  354,  264,  107,  233,  295,  355,
+  202,  326,  139,  356,  12,   384,  44,   265,  296,  385,  171,  357,  76,
+  386,  234,  327,  108,  387,  203,  358,  140,  388,  297,  266,  328,  13,
+  172,  389,  416,  45,   235,  359,  417,  77,   418,  109,  419,  204,  390,
+  298,  329,  141,  267,  360,  420,  236,  391,  173,  421,  14,   448,  46,
+  449,  78,   330,  450,  299,  361,  110,  205,  422,  451,  268,  392,  142,
+  452,  237,  423,  174,  331,  362,  453,  15,   300,  393,  480,  47,   481,
+  79,   482,  206,  454,  269,  424,  111,  483,  143,  484,  363,  332,  394,
+  238,  455,  175,  301,  425,  485,  16,   48,   80,   270,  456,  207,  486,
+  112,  364,  395,  333,  426,  144,  239,  487,  302,  457,  176,  396,  17,
+  271,  488,  49,   365,  427,  208,  81,   334,  458,  113,  145,  240,  303,
+  489,  397,  428,  177,  366,  459,  272,  18,   50,   209,  335,  490,  82,
+  114,  304,  241,  429,  146,  398,  460,  367,  491,  178,  273,  336,  210,
+  19,   51,   83,   430,  461,  399,  492,  115,  305,  242,  147,  368,  179,
+  274,  337,  462,  431,  493,  211,  20,   400,  52,   84,   306,  116,  243,
+  369,  148,  463,  494,  338,  180,  275,  432,  401,  212,  21,   53,   307,
+  85,   370,  244,  117,  495,  464,  149,  433,  339,  276,  181,  402,  213,
+  308,  496,  371,  22,   54,   465,  86,   245,  118,  434,  150,  340,  277,
+  403,  182,  497,  214,  466,  372,  309,  23,   55,   435,  87,   246,  119,
+  341,  404,  151,  278,  498,  183,  467,  373,  215,  310,  436,  24,   56,
+  247,  88,   342,  120,  405,  499,  152,  279,  468,  184,  374,  311,  437,
+  216,  25,   248,  500,  57,   406,  89,   343,  121,  469,  280,  153,  185,
+  375,  438,  312,  217,  501,  407,  249,  26,   344,  58,   90,   470,  122,
+  281,  154,  376,  439,  186,  313,  502,  218,  408,  250,  345,  471,  27,
+  59,   91,   123,  282,  155,  440,  377,  187,  503,  314,  219,  409,  472,
+  346,  251,  28,   60,   92,   124,  283,  441,  378,  156,  504,  188,  315,
+  220,  410,  473,  347,  252,  29,   61,   93,   442,  284,  125,  379,  505,
+  157,  316,  189,  474,  411,  221,  348,  253,  443,  30,   62,   506,  94,
+  285,  380,  126,  158,  317,  190,  475,  412,  222,  349,  254,  444,  507,
+  31,   63,   381,  286,  95,   127,  159,  476,  318,  413,  191,  223,  350,
+  508,  255,  445,  382,  287,  477,  319,  414,  351,  509,  446,  383,  478,
+  415,  510,  447,  479,  511,  512,  513,  514,  515,  516,  517,  544,  545,
+  518,  546,  547,  519,  548,  549,  520,  576,  550,  577,  578,  579,  521,
+  551,  580,  581,  552,  522,  582,  608,  609,  610,  553,  611,  583,  523,
+  612,  613,  584,  554,  614,  524,  640,  641,  642,  585,  643,  555,  615,
+  644,  586,  525,  616,  645,  556,  646,  672,  617,  673,  587,  674,  647,
+  675,  526,  676,  557,  618,  648,  677,  588,  678,  527,  649,  619,  704,
+  558,  705,  706,  679,  589,  707,  650,  708,  620,  680,  709,  528,  559,
+  590,  710,  651,  681,  736,  621,  737,  711,  738,  739,  682,  652,  529,
+  560,  740,  712,  591,  741,  622,  683,  713,  742,  653,  768,  769,  561,
+  743,  530,  592,  770,  714,  684,  771,  623,  772,  744,  654,  773,  715,
+  685,  745,  774,  562,  593,  531,  624,  655,  775,  800,  801,  716,  746,
+  802,  803,  686,  776,  804,  594,  563,  625,  747,  805,  717,  532,  656,
+  777,  806,  687,  748,  807,  778,  832,  833,  718,  834,  595,  626,  835,
+  564,  657,  808,  836,  533,  688,  779,  749,  837,  809,  719,  838,  780,
+  627,  596,  658,  750,  810,  839,  864,  565,  689,  865,  866,  867,  534,
+  720,  840,  781,  868,  811,  751,  869,  841,  628,  659,  597,  690,  870,
+  812,  782,  566,  721,  842,  871,  896,  535,  752,  897,  898,  872,  899,
+  813,  843,  660,  900,  783,  629,  691,  598,  722,  901,  873,  567,  753,
+  844,  902,  814,  874,  536,  784,  903,  661,  692,  928,  929,  630,  723,
+  845,  930,  904,  815,  875,  931,  599,  754,  932,  568,  785,  905,  933,
+  846,  876,  934,  537,  816,  693,  662,  724,  906,  631,  755,  935,  877,
+  600,  786,  960,  847,  961,  962,  907,  936,  963,  569,  817,  964,  937,
+  694,  725,  878,  965,  908,  663,  756,  538,  848,  966,  632,  787,  938,
+  601,  818,  967,  909,  879,  992,  939,  993,  968,  570,  849,  994,  726,
+  695,  757,  995,  664,  788,  996,  910,  969,  539,  880,  940,  633,  819,
+  997,  998,  602,  850,  970,  911,  941,  999,  727,  758,  696,  789,  571,
+  881,  971,  665,  820,  1000, 634,  851,  942,  540,  912,  1001, 972,  603,
+  882,  759,  728,  790,  1002, 697,  821,  943,  973,  572,  913,  666,  852,
+  1003, 635,  883,  974,  541,  944,  760,  791,  1004, 604,  914,  729,  822,
+  698,  853,  975,  667,  884,  573,  945,  1005, 636,  915,  792,  761,  823,
+  542,  976,  1006, 730,  854,  605,  946,  699,  885,  668,  916,  1007, 574,
+  977,  793,  824,  637,  947,  762,  855,  731,  886,  543,  1008, 606,  978,
+  700,  917,  669,  948,  575,  825,  1009, 794,  856,  763,  887,  638,  979,
+  732,  918,  701,  949,  607,  1010, 670,  980,  826,  857,  795,  888,  764,
+  919,  639,  1011, 733,  950,  702,  981,  858,  827,  889,  796,  920,  671,
+  1012, 765,  951,  734,  982,  703,  1013, 859,  890,  828,  921,  797,  952,
+  766,  983,  735,  1014, 891,  860,  922,  829,  953,  798,  984,  767,  1015,
+  892,  923,  861,  954,  830,  985,  799,  1016, 924,  893,  955,  862,  986,
+  831,  1017, 925,  956,  894,  987,  863,  1018, 957,  926,  988,  895,  1019,
+  958,  989,  927,  1020, 990,  959,  1021, 991,  1022, 1023,
+};
+
+// Scan where the top left quarter is scanned first
+DECLARE_ALIGNED(16, static const int16_t, qtr_scan_32x32[1024]) = {
+  0,    1,    32,   33,   2,    64,   34,   65,   66,   3,    96,   35,   97,
+  67,   98,   4,    128,  36,   129,  99,   68,   130,  5,    100,  131,  160,
+  37,   161,  69,   162,  132,  101,  163,  6,    192,  38,   193,  70,   194,
+  133,  164,  102,  195,  7,    224,  39,   165,  225,  134,  196,  71,   226,
+  103,  227,  166,  197,  8,    256,  40,   135,  228,  257,  72,   258,  198,
+  104,  259,  167,  229,  136,  260,  9,    288,  41,   289,  73,   199,  230,
+  290,  168,  261,  105,  291,  137,  292,  231,  10,   200,  262,  320,  42,
+  321,  74,   322,  169,  293,  106,  323,  232,  263,  138,  324,  201,  294,
+  11,   352,  43,   353,  75,   170,  325,  354,  264,  107,  233,  295,  355,
+  202,  326,  139,  356,  12,   384,  44,   265,  296,  385,  171,  357,  76,
+  386,  234,  327,  108,  387,  203,  358,  140,  388,  297,  266,  328,  13,
+  172,  389,  416,  45,   235,  359,  417,  77,   418,  109,  419,  204,  390,
+  298,  329,  141,  267,  360,  420,  236,  391,  173,  421,  14,   448,  46,
+  449,  78,   330,  450,  299,  361,  110,  205,  422,  451,  268,  392,  142,
+  452,  237,  423,  174,  331,  362,  453,  15,   300,  393,  480,  47,   481,
+  79,   482,  206,  454,  269,  424,  111,  483,  143,  484,  363,  332,  394,
+  238,  455,  175,  301,  425,  485,  270,  456,  207,  486,  364,  395,  333,
+  426,  239,  487,  302,  457,  396,  271,  488,  365,  427,  334,  458,  303,
+  489,  397,  428,  366,  459,  335,  490,  429,  398,  460,  367,  491,  430,
+  461,  399,  492,  462,  431,  493,  463,  494,  495,  16,   512,  48,   513,
+  80,   514,  112,  515,  144,  516,  176,  517,  17,   544,  49,   545,  208,
+  518,  81,   546,  113,  547,  145,  240,  519,  548,  177,  549,  272,  520,
+  18,   576,  50,   209,  550,  577,  82,   578,  114,  579,  304,  521,  241,
+  551,  146,  580,  178,  581,  273,  552,  336,  522,  210,  582,  19,   608,
+  51,   609,  83,   610,  115,  305,  553,  611,  242,  583,  147,  368,  523,
+  612,  179,  613,  274,  584,  337,  554,  211,  614,  20,   400,  524,  640,
+  52,   641,  84,   642,  306,  585,  116,  643,  243,  369,  555,  615,  148,
+  644,  338,  586,  180,  275,  432,  525,  616,  645,  401,  556,  212,  646,
+  21,   672,  53,   307,  617,  673,  85,   370,  587,  674,  244,  647,  117,
+  675,  464,  526,  149,  676,  433,  557,  339,  618,  276,  648,  181,  677,
+  402,  588,  213,  678,  308,  496,  527,  649,  371,  619,  22,   704,  54,
+  465,  558,  705,  86,   706,  245,  679,  118,  434,  589,  707,  150,  340,
+  650,  708,  277,  403,  620,  680,  182,  709,  528,  497,  559,  214,  466,
+  590,  710,  372,  651,  309,  681,  23,   736,  55,   435,  621,  737,  87,
+  246,  711,  738,  119,  739,  341,  682,  404,  652,  151,  529,  560,  740,
+  278,  712,  498,  591,  183,  741,  467,  622,  373,  683,  215,  310,  713,
+  742,  436,  653,  24,   768,  56,   769,  247,  561,  743,  88,   530,  592,
+  770,  342,  714,  120,  405,  684,  771,  499,  623,  152,  772,  279,  744,
+  468,  654,  184,  773,  374,  715,  311,  437,  685,  745,  216,  774,  562,
+  593,  531,  624,  25,   248,  500,  655,  775,  800,  57,   801,  406,  716,
+  89,   343,  746,  802,  121,  803,  469,  686,  280,  776,  153,  804,  594,
+  185,  375,  563,  625,  747,  805,  438,  717,  532,  656,  312,  777,  217,
+  806,  501,  687,  407,  748,  249,  807,  26,   344,  778,  832,  58,   833,
+  90,   470,  718,  834,  122,  595,  626,  835,  281,  564,  657,  808,  154,
+  836,  376,  533,  688,  779,  439,  749,  186,  837,  313,  809,  502,  719,
+  218,  838,  408,  780,  627,  596,  658,  250,  345,  471,  750,  810,  839,
+  27,   864,  59,   565,  689,  865,  91,   866,  123,  867,  282,  534,  720,
+  840,  155,  440,  781,  868,  377,  811,  187,  503,  751,  869,  314,  841,
+  628,  659,  219,  597,  690,  870,  409,  812,  472,  782,  566,  721,  346,
+  842,  251,  871,  28,   896,  60,   535,  752,  897,  92,   898,  124,  283,
+  872,  899,  441,  813,  378,  843,  156,  660,  900,  504,  783,  629,  691,
+  598,  722,  188,  901,  315,  873,  567,  753,  220,  410,  844,  902,  473,
+  814,  347,  874,  536,  784,  252,  903,  29,   661,  692,  928,  61,   929,
+  93,   442,  630,  723,  845,  930,  284,  904,  125,  379,  505,  815,  875,
+  931,  599,  754,  157,  932,  316,  568,  785,  905,  189,  933,  474,  846,
+  411,  876,  221,  934,  537,  816,  693,  348,  662,  724,  906,  253,  631,
+  755,  935,  443,  877,  30,   600,  786,  960,  62,   506,  847,  961,  94,
+  962,  285,  380,  907,  936,  126,  963,  569,  817,  158,  964,  317,  937,
+  190,  475,  694,  725,  878,  965,  412,  908,  663,  756,  538,  848,  222,
+  966,  632,  787,  349,  938,  254,  601,  818,  967,  444,  909,  507,  879,
+  31,   992,  63,   381,  939,  993,  286,  968,  95,   570,  849,  994,  726,
+  127,  695,  757,  995,  664,  788,  159,  996,  476,  910,  318,  969,  413,
+  539,  880,  940,  191,  633,  819,  997,  223,  998,  350,  602,  850,  970,
+  508,  911,  255,  445,  941,  999,  727,  758,  696,  789,  571,  881,  382,
+  971,  287,  665,  820,  1000, 477,  634,  851,  942,  540,  912,  319,  1001,
+  414,  972,  603,  882,  759,  728,  790,  351,  1002, 509,  697,  821,  943,
+  446,  973,  572,  913,  666,  852,  383,  1003, 635,  883,  478,  974,  541,
+  944,  415,  760,  791,  1004, 604,  914,  729,  822,  698,  853,  510,  975,
+  667,  884,  447,  573,  945,  1005, 636,  915,  792,  761,  823,  542,  976,
+  479,  1006, 730,  854,  605,  946,  699,  885,  668,  916,  511,  1007, 574,
+  977,  793,  824,  637,  947,  762,  855,  731,  886,  543,  1008, 606,  978,
+  700,  917,  669,  948,  575,  825,  1009, 794,  856,  763,  887,  638,  979,
+  732,  918,  701,  949,  607,  1010, 670,  980,  826,  857,  795,  888,  764,
+  919,  639,  1011, 733,  950,  702,  981,  858,  827,  889,  796,  920,  671,
+  1012, 765,  951,  734,  982,  703,  1013, 859,  890,  828,  921,  797,  952,
+  766,  983,  735,  1014, 891,  860,  922,  829,  953,  798,  984,  767,  1015,
+  892,  923,  861,  954,  830,  985,  799,  1016, 924,  893,  955,  862,  986,
+  831,  1017, 925,  956,  894,  987,  863,  1018, 957,  926,  988,  895,  1019,
+  958,  989,  927,  1020, 990,  959,  1021, 991,  1022, 1023,
+};
+#endif  // CONFIG_EXT_TX
+
+// Neighborhood 2-tuples for various scans and blocksizes,
+// in {top, left} order for each position in corresponding scan order.
+DECLARE_ALIGNED(16, static const int16_t,
+                default_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = {
+  0, 0, 0, 0, 4,  0, 1, 4, 4, 5,  5,  1, 8,  8,  5,  8, 2,
+  2, 2, 5, 9, 12, 6, 9, 3, 6, 10, 13, 7, 10, 11, 14, 0, 0,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t,
+                mcol_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = {
+  0, 0, 0, 0, 4, 4,  8,  8, 0, 0, 1, 4, 5,  8,  9,  12, 1,
+  1, 2, 5, 6, 9, 10, 13, 2, 2, 3, 6, 7, 10, 11, 14, 0,  0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                mrow_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = {
+  0, 0, 0, 0, 1, 1, 2,  2, 0, 0, 1,  4,  2,  5,  3,  6, 4,
+  4, 5, 8, 6, 9, 7, 10, 8, 8, 9, 12, 10, 13, 11, 14, 0, 0,
+};
+#endif  // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t,
+                col_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = {
+  0,  0, 0, 0, 4, 4, 4, 0, 8, 8,  1,  4, 5,  8,  5,  1, 9,
+  12, 2, 5, 6, 9, 6, 2, 3, 6, 10, 13, 7, 10, 11, 14, 0, 0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                row_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = {
+  0, 0, 0, 0, 0, 1, 1, 1, 1,  4, 2,  2,  2,  5,  4,  5, 5,
+  8, 3, 6, 8, 9, 6, 9, 9, 12, 7, 10, 10, 13, 11, 14, 0, 0,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t,
+                default_scan_4x8_neighbors[33 * MAX_NEIGHBORS]) = {
+  0,  0,  0,  0,  0,  0,  1,  4,  1,  1,  4,  4,  2,  5,  5,  8,  6,
+  9,  2,  2,  8,  8,  3,  6,  9,  12, 7,  10, 10, 13, 12, 12, 13, 16,
+  11, 14, 14, 17, 15, 18, 16, 16, 17, 20, 18, 21, 19, 22, 20, 20, 21,
+  24, 22, 25, 23, 26, 24, 24, 25, 28, 26, 29, 27, 30, 0,  0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                mcol_scan_4x8_neighbors[33 * MAX_NEIGHBORS]) = {
+  0, 0, 0,  0,  4,  4,  8,  8,  12, 12, 16, 16, 20, 20, 24, 24, 0,
+  0, 1, 4,  5,  8,  9,  12, 13, 16, 17, 20, 21, 24, 25, 28, 1,  1,
+  2, 5, 6,  9,  10, 13, 14, 17, 18, 21, 22, 25, 26, 29, 2,  2,  3,
+  6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 0,  0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                mrow_scan_4x8_neighbors[33 * MAX_NEIGHBORS]) = {
+  0,  0,  0,  0,  1,  1,  2,  2,  0,  0,  1,  4,  2,  5,  3,  6,  4,
+  4,  5,  8,  6,  9,  7,  10, 8,  8,  9,  12, 10, 13, 11, 14, 12, 12,
+  13, 16, 14, 17, 15, 18, 16, 16, 17, 20, 18, 21, 19, 22, 20, 20, 21,
+  24, 22, 25, 23, 26, 24, 24, 25, 28, 26, 29, 27, 30, 0,  0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                default_scan_8x4_neighbors[33 * MAX_NEIGHBORS]) = {
+  0,  0,  0,  0,  0,  0, 1,  8,  1,  1,  8,  8,  2,  9,  9, 16, 10,
+  17, 2,  2,  16, 16, 3, 10, 17, 24, 11, 18, 18, 25, 3,  3, 4,  11,
+  19, 26, 12, 19, 4,  4, 20, 27, 5,  12, 13, 20, 21, 28, 5, 5,  6,
+  13, 14, 21, 22, 29, 6, 6,  7,  14, 15, 22, 23, 30, 0,  0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                mcol_scan_8x4_neighbors[33 * MAX_NEIGHBORS]) = {
+  0,  0,  0,  0,  8,  8,  16, 16, 0,  0,  1,  8,  9,  16, 17, 24, 1,
+  1,  2,  9,  10, 17, 18, 25, 2,  2,  3,  10, 11, 18, 19, 26, 3,  3,
+  4,  11, 12, 19, 20, 27, 4,  4,  5,  12, 13, 20, 21, 28, 5,  5,  6,
+  13, 14, 21, 22, 29, 6,  6,  7,  14, 15, 22, 23, 30, 0,  0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                mrow_scan_8x4_neighbors[33 * MAX_NEIGHBORS]) = {
+  0,  0,  0,  0,  1,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  0,
+  0,  1,  8,  2,  9,  3,  10, 4,  11, 5,  12, 6,  13, 7,  14, 8,  8,
+  9,  16, 10, 17, 11, 18, 12, 19, 13, 20, 14, 21, 15, 22, 16, 16, 17,
+  24, 18, 25, 19, 26, 20, 27, 21, 28, 22, 29, 23, 30, 0,  0
+};
+#endif  // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t,
+                col_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = {
+  0,  0,  0,  0,  8,  8,  8,  0,  16, 16, 1,  8,  24, 24, 9,  16, 9,  1,  32,
+  32, 17, 24, 2,  9,  25, 32, 10, 17, 40, 40, 10, 2,  18, 25, 33, 40, 3,  10,
+  48, 48, 11, 18, 26, 33, 11, 3,  41, 48, 19, 26, 34, 41, 4,  11, 27, 34, 12,
+  19, 49, 56, 42, 49, 20, 27, 12, 4,  35, 42, 5,  12, 28, 35, 50, 57, 43, 50,
+  13, 20, 36, 43, 13, 5,  21, 28, 51, 58, 29, 36, 6,  13, 44, 51, 14, 21, 14,
+  6,  37, 44, 52, 59, 22, 29, 7,  14, 30, 37, 45, 52, 15, 22, 38, 45, 23, 30,
+  53, 60, 31, 38, 46, 53, 39, 46, 54, 61, 47, 54, 55, 62, 0,  0,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t,
+                mcol_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = {
+  0,  0,  0,  0,  8,  8,  16, 16, 24, 24, 32, 32, 40, 40, 48, 48, 0,  0,  1,
+  8,  9,  16, 17, 24, 25, 32, 33, 40, 41, 48, 49, 56, 1,  1,  2,  9,  10, 17,
+  18, 25, 26, 33, 34, 41, 42, 49, 50, 57, 2,  2,  3,  10, 11, 18, 19, 26, 27,
+  34, 35, 42, 43, 50, 51, 58, 3,  3,  4,  11, 12, 19, 20, 27, 28, 35, 36, 43,
+  44, 51, 52, 59, 4,  4,  5,  12, 13, 20, 21, 28, 29, 36, 37, 44, 45, 52, 53,
+  60, 5,  5,  6,  13, 14, 21, 22, 29, 30, 37, 38, 45, 46, 53, 54, 61, 6,  6,
+  7,  14, 15, 22, 23, 30, 31, 38, 39, 46, 47, 54, 55, 62, 0,  0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                mrow_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = {
+  0,  0,  0,  0,  1,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  0,  0,  1,
+  8,  2,  9,  3,  10, 4,  11, 5,  12, 6,  13, 7,  14, 8,  8,  9,  16, 10, 17,
+  11, 18, 12, 19, 13, 20, 14, 21, 15, 22, 16, 16, 17, 24, 18, 25, 19, 26, 20,
+  27, 21, 28, 22, 29, 23, 30, 24, 24, 25, 32, 26, 33, 27, 34, 28, 35, 29, 36,
+  30, 37, 31, 38, 32, 32, 33, 40, 34, 41, 35, 42, 36, 43, 37, 44, 38, 45, 39,
+  46, 40, 40, 41, 48, 42, 49, 43, 50, 44, 51, 45, 52, 46, 53, 47, 54, 48, 48,
+  49, 56, 50, 57, 51, 58, 52, 59, 53, 60, 54, 61, 55, 62, 0,  0,
+};
+#endif  // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t,
+                row_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = {
+  0,  0,  0,  0,  1,  1,  0,  1,  1,  8,  2,  2,  8,  9,  2,  9,  3,  3,  9,
+  16, 3,  10, 16, 17, 4,  4,  10, 17, 17, 24, 4,  11, 11, 18, 18, 25, 24, 25,
+  5,  5,  5,  12, 12, 19, 25, 32, 19, 26, 6,  6,  26, 33, 32, 33, 13, 20, 20,
+  27, 33, 40, 6,  13, 27, 34, 40, 41, 34, 41, 21, 28, 28, 35, 41, 48, 14, 21,
+  35, 42, 7,  14, 48, 49, 29, 36, 42, 49, 36, 43, 22, 29, 49, 56, 15, 22, 43,
+  50, 50, 57, 37, 44, 30, 37, 44, 51, 23, 30, 51, 58, 45, 52, 38, 45, 52, 59,
+  31, 38, 53, 60, 39, 46, 46, 53, 47, 54, 54, 61, 55, 62, 0,  0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                default_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = {
+  0,  0,  0,  0,  8,  0,  8,  8,  1,  8,  9,  1,  9,  16, 16, 17, 2,  9,  10,
+  2,  10, 17, 17, 24, 24, 25, 3,  10, 11, 3,  18, 25, 25, 32, 11, 18, 32, 33,
+  4,  11, 26, 33, 19, 26, 12, 4,  33, 40, 12, 19, 40, 41, 5,  12, 27, 34, 34,
+  41, 20, 27, 13, 20, 13, 5,  41, 48, 48, 49, 28, 35, 35, 42, 21, 28, 6,  6,
+  6,  13, 42, 49, 49, 56, 36, 43, 14, 21, 29, 36, 7,  14, 43, 50, 50, 57, 22,
+  29, 37, 44, 15, 22, 44, 51, 51, 58, 30, 37, 23, 30, 52, 59, 45, 52, 38, 45,
+  31, 38, 53, 60, 46, 53, 39, 46, 54, 61, 47, 54, 55, 62, 0,  0,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t,
+                default_scan_8x16_neighbors[129 * MAX_NEIGHBORS]) = {
+  0,   0,   0,   0,   0,   0,   1,   1,   1,   8,   8,   8,   2,   2,   2,
+  9,   9,   16,  16,  16,  3,   3,   3,   10,  10,  17,  17,  24,  24,  24,
+  4,   4,   4,   11,  11,  18,  18,  25,  25,  32,  32,  32,  5,   5,   5,
+  12,  12,  19,  19,  26,  26,  33,  33,  40,  40,  40,  6,   6,   6,   13,
+  13,  20,  20,  27,  27,  34,  34,  41,  41,  48,  48,  48,  7,   14,  14,
+  21,  21,  28,  28,  35,  35,  42,  42,  49,  49,  56,  56,  56,  15,  22,
+  22,  29,  29,  36,  36,  43,  43,  50,  50,  57,  57,  64,  64,  64,  23,
+  30,  30,  37,  37,  44,  44,  51,  51,  58,  58,  65,  65,  72,  72,  72,
+  31,  38,  38,  45,  45,  52,  52,  59,  59,  66,  66,  73,  73,  80,  80,
+  80,  39,  46,  46,  53,  53,  60,  60,  67,  67,  74,  74,  81,  81,  88,
+  88,  88,  47,  54,  54,  61,  61,  68,  68,  75,  75,  82,  82,  89,  89,
+  96,  96,  96,  55,  62,  62,  69,  69,  76,  76,  83,  83,  90,  90,  97,
+  97,  104, 104, 104, 63,  70,  70,  77,  77,  84,  84,  91,  91,  98,  98,
+  105, 105, 112, 112, 112, 71,  78,  78,  85,  85,  92,  92,  99,  99,  106,
+  106, 113, 113, 120, 79,  86,  86,  93,  93,  100, 100, 107, 107, 114, 114,
+  121, 87,  94,  94,  101, 101, 108, 108, 115, 115, 122, 95,  102, 102, 109,
+  109, 116, 116, 123, 103, 110, 110, 117, 117, 124, 111, 118, 118, 125, 119,
+  126, 0,   0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                default_scan_16x8_neighbors[129 * MAX_NEIGHBORS]) = {
+  0,   0,   0,   0,   0,   0,  1,  1,   1,   16,  16,  16,  2,   2,   2,
+  17,  17,  32,  32,  32,  3,  3,  3,   18,  18,  33,  33,  48,  48,  48,
+  4,   4,   4,   19,  19,  34, 34, 49,  49,  64,  64,  64,  5,   5,   5,
+  20,  20,  35,  35,  50,  50, 65, 65,  80,  80,  80,  6,   6,   6,   21,
+  21,  36,  36,  51,  51,  66, 66, 81,  81,  96,  96,  96,  7,   7,   7,
+  22,  22,  37,  37,  52,  52, 67, 67,  82,  82,  97,  97,  112, 8,   8,
+  8,   23,  23,  38,  38,  53, 53, 68,  68,  83,  83,  98,  98,  113, 9,
+  9,   9,   24,  24,  39,  39, 54, 54,  69,  69,  84,  84,  99,  99,  114,
+  10,  10,  10,  25,  25,  40, 40, 55,  55,  70,  70,  85,  85,  100, 100,
+  115, 11,  11,  11,  26,  26, 41, 41,  56,  56,  71,  71,  86,  86,  101,
+  101, 116, 12,  12,  12,  27, 27, 42,  42,  57,  57,  72,  72,  87,  87,
+  102, 102, 117, 13,  13,  13, 28, 28,  43,  43,  58,  58,  73,  73,  88,
+  88,  103, 103, 118, 14,  14, 14, 29,  29,  44,  44,  59,  59,  74,  74,
+  89,  89,  104, 104, 119, 15, 30, 30,  45,  45,  60,  60,  75,  75,  90,
+  90,  105, 105, 120, 31,  46, 46, 61,  61,  76,  76,  91,  91,  106, 106,
+  121, 47,  62,  62,  77,  77, 92, 92,  107, 107, 122, 63,  78,  78,  93,
+  93,  108, 108, 123, 79,  94, 94, 109, 109, 124, 95,  110, 110, 125, 111,
+  126, 0,   0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                mcol_scan_8x16_neighbors[129 * MAX_NEIGHBORS]) = {
+  0,  0,  0,  0,  8,  8,  16, 16, 24, 24,  32,  32,  40,  40,  48,  48,
+  56, 56, 64, 64, 72, 72, 80, 80, 88, 88,  96,  96,  104, 104, 112, 112,
+  0,  0,  1,  8,  9,  16, 17, 24, 25, 32,  33,  40,  41,  48,  49,  56,
+  57, 64, 65, 72, 73, 80, 81, 88, 89, 96,  97,  104, 105, 112, 113, 120,
+  1,  1,  2,  9,  10, 17, 18, 25, 26, 33,  34,  41,  42,  49,  50,  57,
+  58, 65, 66, 73, 74, 81, 82, 89, 90, 97,  98,  105, 106, 113, 114, 121,
+  2,  2,  3,  10, 11, 18, 19, 26, 27, 34,  35,  42,  43,  50,  51,  58,
+  59, 66, 67, 74, 75, 82, 83, 90, 91, 98,  99,  106, 107, 114, 115, 122,
+  3,  3,  4,  11, 12, 19, 20, 27, 28, 35,  36,  43,  44,  51,  52,  59,
+  60, 67, 68, 75, 76, 83, 84, 91, 92, 99,  100, 107, 108, 115, 116, 123,
+  4,  4,  5,  12, 13, 20, 21, 28, 29, 36,  37,  44,  45,  52,  53,  60,
+  61, 68, 69, 76, 77, 84, 85, 92, 93, 100, 101, 108, 109, 116, 117, 124,
+  5,  5,  6,  13, 14, 21, 22, 29, 30, 37,  38,  45,  46,  53,  54,  61,
+  62, 69, 70, 77, 78, 85, 86, 93, 94, 101, 102, 109, 110, 117, 118, 125,
+  6,  6,  7,  14, 15, 22, 23, 30, 31, 38,  39,  46,  47,  54,  55,  62,
+  63, 70, 71, 78, 79, 86, 87, 94, 95, 102, 103, 110, 111, 118, 119, 126,
+  0,  0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                mcol_scan_16x8_neighbors[129 * MAX_NEIGHBORS]) = {
+  0,  0,  0,  0,  16, 16, 32, 32, 48, 48, 64, 64, 80, 80,  96,  96,
+  0,  0,  1,  16, 17, 32, 33, 48, 49, 64, 65, 80, 81, 96,  97,  112,
+  1,  1,  2,  17, 18, 33, 34, 49, 50, 65, 66, 81, 82, 97,  98,  113,
+  2,  2,  3,  18, 19, 34, 35, 50, 51, 66, 67, 82, 83, 98,  99,  114,
+  3,  3,  4,  19, 20, 35, 36, 51, 52, 67, 68, 83, 84, 99,  100, 115,
+  4,  4,  5,  20, 21, 36, 37, 52, 53, 68, 69, 84, 85, 100, 101, 116,
+  5,  5,  6,  21, 22, 37, 38, 53, 54, 69, 70, 85, 86, 101, 102, 117,
+  6,  6,  7,  22, 23, 38, 39, 54, 55, 70, 71, 86, 87, 102, 103, 118,
+  7,  7,  8,  23, 24, 39, 40, 55, 56, 71, 72, 87, 88, 103, 104, 119,
+  8,  8,  9,  24, 25, 40, 41, 56, 57, 72, 73, 88, 89, 104, 105, 120,
+  9,  9,  10, 25, 26, 41, 42, 57, 58, 73, 74, 89, 90, 105, 106, 121,
+  10, 10, 11, 26, 27, 42, 43, 58, 59, 74, 75, 90, 91, 106, 107, 122,
+  11, 11, 12, 27, 28, 43, 44, 59, 60, 75, 76, 91, 92, 107, 108, 123,
+  12, 12, 13, 28, 29, 44, 45, 60, 61, 76, 77, 92, 93, 108, 109, 124,
+  13, 13, 14, 29, 30, 45, 46, 61, 62, 77, 78, 93, 94, 109, 110, 125,
+  14, 14, 15, 30, 31, 46, 47, 62, 63, 78, 79, 94, 95, 110, 111, 126,
+  0,  0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                mrow_scan_8x16_neighbors[129 * MAX_NEIGHBORS]) = {
+  0,   0,   0,   0,   1,   1,   2,   2,   3,   3,   4,   4,   5,   5,   6,
+  6,   0,   0,   1,   8,   2,   9,   3,   10,  4,   11,  5,   12,  6,   13,
+  7,   14,  8,   8,   9,   16,  10,  17,  11,  18,  12,  19,  13,  20,  14,
+  21,  15,  22,  16,  16,  17,  24,  18,  25,  19,  26,  20,  27,  21,  28,
+  22,  29,  23,  30,  24,  24,  25,  32,  26,  33,  27,  34,  28,  35,  29,
+  36,  30,  37,  31,  38,  32,  32,  33,  40,  34,  41,  35,  42,  36,  43,
+  37,  44,  38,  45,  39,  46,  40,  40,  41,  48,  42,  49,  43,  50,  44,
+  51,  45,  52,  46,  53,  47,  54,  48,  48,  49,  56,  50,  57,  51,  58,
+  52,  59,  53,  60,  54,  61,  55,  62,  56,  56,  57,  64,  58,  65,  59,
+  66,  60,  67,  61,  68,  62,  69,  63,  70,  64,  64,  65,  72,  66,  73,
+  67,  74,  68,  75,  69,  76,  70,  77,  71,  78,  72,  72,  73,  80,  74,
+  81,  75,  82,  76,  83,  77,  84,  78,  85,  79,  86,  80,  80,  81,  88,
+  82,  89,  83,  90,  84,  91,  85,  92,  86,  93,  87,  94,  88,  88,  89,
+  96,  90,  97,  91,  98,  92,  99,  93,  100, 94,  101, 95,  102, 96,  96,
+  97,  104, 98,  105, 99,  106, 100, 107, 101, 108, 102, 109, 103, 110, 104,
+  104, 105, 112, 106, 113, 107, 114, 108, 115, 109, 116, 110, 117, 111, 118,
+  112, 112, 113, 120, 114, 121, 115, 122, 116, 123, 117, 124, 118, 125, 119,
+  126, 0,   0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                mrow_scan_16x8_neighbors[129 * MAX_NEIGHBORS]) = {
+  0,   0,   0,   0,   1,   1,   2,   2,   3,   3,   4,   4,   5,   5,   6,
+  6,   7,   7,   8,   8,   9,   9,   10,  10,  11,  11,  12,  12,  13,  13,
+  14,  14,  0,   0,   1,   16,  2,   17,  3,   18,  4,   19,  5,   20,  6,
+  21,  7,   22,  8,   23,  9,   24,  10,  25,  11,  26,  12,  27,  13,  28,
+  14,  29,  15,  30,  16,  16,  17,  32,  18,  33,  19,  34,  20,  35,  21,
+  36,  22,  37,  23,  38,  24,  39,  25,  40,  26,  41,  27,  42,  28,  43,
+  29,  44,  30,  45,  31,  46,  32,  32,  33,  48,  34,  49,  35,  50,  36,
+  51,  37,  52,  38,  53,  39,  54,  40,  55,  41,  56,  42,  57,  43,  58,
+  44,  59,  45,  60,  46,  61,  47,  62,  48,  48,  49,  64,  50,  65,  51,
+  66,  52,  67,  53,  68,  54,  69,  55,  70,  56,  71,  57,  72,  58,  73,
+  59,  74,  60,  75,  61,  76,  62,  77,  63,  78,  64,  64,  65,  80,  66,
+  81,  67,  82,  68,  83,  69,  84,  70,  85,  71,  86,  72,  87,  73,  88,
+  74,  89,  75,  90,  76,  91,  77,  92,  78,  93,  79,  94,  80,  80,  81,
+  96,  82,  97,  83,  98,  84,  99,  85,  100, 86,  101, 87,  102, 88,  103,
+  89,  104, 90,  105, 91,  106, 92,  107, 93,  108, 94,  109, 95,  110, 96,
+  96,  97,  112, 98,  113, 99,  114, 100, 115, 101, 116, 102, 117, 103, 118,
+  104, 119, 105, 120, 106, 121, 107, 122, 108, 123, 109, 124, 110, 125, 111,
+  126, 0,   0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                default_scan_16x32_neighbors[513 * MAX_NEIGHBORS]) = {
+  0,   0,   0,   0,   0,   0,   1,   1,   1,   16,  16,  16,  2,   2,   2,
+  17,  17,  32,  32,  32,  3,   3,   3,   18,  18,  33,  33,  48,  48,  48,
+  4,   4,   4,   19,  19,  34,  34,  49,  49,  64,  64,  64,  5,   5,   5,
+  20,  20,  35,  35,  50,  50,  65,  65,  80,  80,  80,  6,   6,   6,   21,
+  21,  36,  36,  51,  51,  66,  66,  81,  81,  96,  96,  96,  7,   7,   7,
+  22,  22,  37,  37,  52,  52,  67,  67,  82,  82,  97,  97,  112, 112, 112,
+  8,   8,   8,   23,  23,  38,  38,  53,  53,  68,  68,  83,  83,  98,  98,
+  113, 113, 128, 128, 128, 9,   9,   9,   24,  24,  39,  39,  54,  54,  69,
+  69,  84,  84,  99,  99,  114, 114, 129, 129, 144, 144, 144, 10,  10,  10,
+  25,  25,  40,  40,  55,  55,  70,  70,  85,  85,  100, 100, 115, 115, 130,
+  130, 145, 145, 160, 160, 160, 11,  11,  11,  26,  26,  41,  41,  56,  56,
+  71,  71,  86,  86,  101, 101, 116, 116, 131, 131, 146, 146, 161, 161, 176,
+  176, 176, 12,  12,  12,  27,  27,  42,  42,  57,  57,  72,  72,  87,  87,
+  102, 102, 117, 117, 132, 132, 147, 147, 162, 162, 177, 177, 192, 192, 192,
+  13,  13,  13,  28,  28,  43,  43,  58,  58,  73,  73,  88,  88,  103, 103,
+  118, 118, 133, 133, 148, 148, 163, 163, 178, 178, 193, 193, 208, 208, 208,
+  14,  14,  14,  29,  29,  44,  44,  59,  59,  74,  74,  89,  89,  104, 104,
+  119, 119, 134, 134, 149, 149, 164, 164, 179, 179, 194, 194, 209, 209, 224,
+  224, 224, 15,  30,  30,  45,  45,  60,  60,  75,  75,  90,  90,  105, 105,
+  120, 120, 135, 135, 150, 150, 165, 165, 180, 180, 195, 195, 210, 210, 225,
+  225, 240, 240, 240, 31,  46,  46,  61,  61,  76,  76,  91,  91,  106, 106,
+  121, 121, 136, 136, 151, 151, 166, 166, 181, 181, 196, 196, 211, 211, 226,
+  226, 241, 241, 256, 256, 256, 47,  62,  62,  77,  77,  92,  92,  107, 107,
+  122, 122, 137, 137, 152, 152, 167, 167, 182, 182, 197, 197, 212, 212, 227,
+  227, 242, 242, 257, 257, 272, 272, 272, 63,  78,  78,  93,  93,  108, 108,
+  123, 123, 138, 138, 153, 153, 168, 168, 183, 183, 198, 198, 213, 213, 228,
+  228, 243, 243, 258, 258, 273, 273, 288, 288, 288, 79,  94,  94,  109, 109,
+  124, 124, 139, 139, 154, 154, 169, 169, 184, 184, 199, 199, 214, 214, 229,
+  229, 244, 244, 259, 259, 274, 274, 289, 289, 304, 304, 304, 95,  110, 110,
+  125, 125, 140, 140, 155, 155, 170, 170, 185, 185, 200, 200, 215, 215, 230,
+  230, 245, 245, 260, 260, 275, 275, 290, 290, 305, 305, 320, 320, 320, 111,
+  126, 126, 141, 141, 156, 156, 171, 171, 186, 186, 201, 201, 216, 216, 231,
+  231, 246, 246, 261, 261, 276, 276, 291, 291, 306, 306, 321, 321, 336, 336,
+  336, 127, 142, 142, 157, 157, 172, 172, 187, 187, 202, 202, 217, 217, 232,
+  232, 247, 247, 262, 262, 277, 277, 292, 292, 307, 307, 322, 322, 337, 337,
+  352, 352, 352, 143, 158, 158, 173, 173, 188, 188, 203, 203, 218, 218, 233,
+  233, 248, 248, 263, 263, 278, 278, 293, 293, 308, 308, 323, 323, 338, 338,
+  353, 353, 368, 368, 368, 159, 174, 174, 189, 189, 204, 204, 219, 219, 234,
+  234, 249, 249, 264, 264, 279, 279, 294, 294, 309, 309, 324, 324, 339, 339,
+  354, 354, 369, 369, 384, 384, 384, 175, 190, 190, 205, 205, 220, 220, 235,
+  235, 250, 250, 265, 265, 280, 280, 295, 295, 310, 310, 325, 325, 340, 340,
+  355, 355, 370, 370, 385, 385, 400, 400, 400, 191, 206, 206, 221, 221, 236,
+  236, 251, 251, 266, 266, 281, 281, 296, 296, 311, 311, 326, 326, 341, 341,
+  356, 356, 371, 371, 386, 386, 401, 401, 416, 416, 416, 207, 222, 222, 237,
+  237, 252, 252, 267, 267, 282, 282, 297, 297, 312, 312, 327, 327, 342, 342,
+  357, 357, 372, 372, 387, 387, 402, 402, 417, 417, 432, 432, 432, 223, 238,
+  238, 253, 253, 268, 268, 283, 283, 298, 298, 313, 313, 328, 328, 343, 343,
+  358, 358, 373, 373, 388, 388, 403, 403, 418, 418, 433, 433, 448, 448, 448,
+  239, 254, 254, 269, 269, 284, 284, 299, 299, 314, 314, 329, 329, 344, 344,
+  359, 359, 374, 374, 389, 389, 404, 404, 419, 419, 434, 434, 449, 449, 464,
+  464, 464, 255, 270, 270, 285, 285, 300, 300, 315, 315, 330, 330, 345, 345,
+  360, 360, 375, 375, 390, 390, 405, 405, 420, 420, 435, 435, 450, 450, 465,
+  465, 480, 480, 480, 271, 286, 286, 301, 301, 316, 316, 331, 331, 346, 346,
+  361, 361, 376, 376, 391, 391, 406, 406, 421, 421, 436, 436, 451, 451, 466,
+  466, 481, 481, 496, 287, 302, 302, 317, 317, 332, 332, 347, 347, 362, 362,
+  377, 377, 392, 392, 407, 407, 422, 422, 437, 437, 452, 452, 467, 467, 482,
+  482, 497, 303, 318, 318, 333, 333, 348, 348, 363, 363, 378, 378, 393, 393,
+  408, 408, 423, 423, 438, 438, 453, 453, 468, 468, 483, 483, 498, 319, 334,
+  334, 349, 349, 364, 364, 379, 379, 394, 394, 409, 409, 424, 424, 439, 439,
+  454, 454, 469, 469, 484, 484, 499, 335, 350, 350, 365, 365, 380, 380, 395,
+  395, 410, 410, 425, 425, 440, 440, 455, 455, 470, 470, 485, 485, 500, 351,
+  366, 366, 381, 381, 396, 396, 411, 411, 426, 426, 441, 441, 456, 456, 471,
+  471, 486, 486, 501, 367, 382, 382, 397, 397, 412, 412, 427, 427, 442, 442,
+  457, 457, 472, 472, 487, 487, 502, 383, 398, 398, 413, 413, 428, 428, 443,
+  443, 458, 458, 473, 473, 488, 488, 503, 399, 414, 414, 429, 429, 444, 444,
+  459, 459, 474, 474, 489, 489, 504, 415, 430, 430, 445, 445, 460, 460, 475,
+  475, 490, 490, 505, 431, 446, 446, 461, 461, 476, 476, 491, 491, 506, 447,
+  462, 462, 477, 477, 492, 492, 507, 463, 478, 478, 493, 493, 508, 479, 494,
+  494, 509, 495, 510, 0,   0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                default_scan_32x16_neighbors[513 * MAX_NEIGHBORS]) = {
+  0,   0,   0,   0,   0,   0,   1,   1,   1,   32,  32,  32,  2,   2,   2,
+  33,  33,  64,  64,  64,  3,   3,   3,   34,  34,  65,  65,  96,  96,  96,
+  4,   4,   4,   35,  35,  66,  66,  97,  97,  128, 128, 128, 5,   5,   5,
+  36,  36,  67,  67,  98,  98,  129, 129, 160, 160, 160, 6,   6,   6,   37,
+  37,  68,  68,  99,  99,  130, 130, 161, 161, 192, 192, 192, 7,   7,   7,
+  38,  38,  69,  69,  100, 100, 131, 131, 162, 162, 193, 193, 224, 224, 224,
+  8,   8,   8,   39,  39,  70,  70,  101, 101, 132, 132, 163, 163, 194, 194,
+  225, 225, 256, 256, 256, 9,   9,   9,   40,  40,  71,  71,  102, 102, 133,
+  133, 164, 164, 195, 195, 226, 226, 257, 257, 288, 288, 288, 10,  10,  10,
+  41,  41,  72,  72,  103, 103, 134, 134, 165, 165, 196, 196, 227, 227, 258,
+  258, 289, 289, 320, 320, 320, 11,  11,  11,  42,  42,  73,  73,  104, 104,
+  135, 135, 166, 166, 197, 197, 228, 228, 259, 259, 290, 290, 321, 321, 352,
+  352, 352, 12,  12,  12,  43,  43,  74,  74,  105, 105, 136, 136, 167, 167,
+  198, 198, 229, 229, 260, 260, 291, 291, 322, 322, 353, 353, 384, 384, 384,
+  13,  13,  13,  44,  44,  75,  75,  106, 106, 137, 137, 168, 168, 199, 199,
+  230, 230, 261, 261, 292, 292, 323, 323, 354, 354, 385, 385, 416, 416, 416,
+  14,  14,  14,  45,  45,  76,  76,  107, 107, 138, 138, 169, 169, 200, 200,
+  231, 231, 262, 262, 293, 293, 324, 324, 355, 355, 386, 386, 417, 417, 448,
+  448, 448, 15,  15,  15,  46,  46,  77,  77,  108, 108, 139, 139, 170, 170,
+  201, 201, 232, 232, 263, 263, 294, 294, 325, 325, 356, 356, 387, 387, 418,
+  418, 449, 449, 480, 16,  16,  16,  47,  47,  78,  78,  109, 109, 140, 140,
+  171, 171, 202, 202, 233, 233, 264, 264, 295, 295, 326, 326, 357, 357, 388,
+  388, 419, 419, 450, 450, 481, 17,  17,  17,  48,  48,  79,  79,  110, 110,
+  141, 141, 172, 172, 203, 203, 234, 234, 265, 265, 296, 296, 327, 327, 358,
+  358, 389, 389, 420, 420, 451, 451, 482, 18,  18,  18,  49,  49,  80,  80,
+  111, 111, 142, 142, 173, 173, 204, 204, 235, 235, 266, 266, 297, 297, 328,
+  328, 359, 359, 390, 390, 421, 421, 452, 452, 483, 19,  19,  19,  50,  50,
+  81,  81,  112, 112, 143, 143, 174, 174, 205, 205, 236, 236, 267, 267, 298,
+  298, 329, 329, 360, 360, 391, 391, 422, 422, 453, 453, 484, 20,  20,  20,
+  51,  51,  82,  82,  113, 113, 144, 144, 175, 175, 206, 206, 237, 237, 268,
+  268, 299, 299, 330, 330, 361, 361, 392, 392, 423, 423, 454, 454, 485, 21,
+  21,  21,  52,  52,  83,  83,  114, 114, 145, 145, 176, 176, 207, 207, 238,
+  238, 269, 269, 300, 300, 331, 331, 362, 362, 393, 393, 424, 424, 455, 455,
+  486, 22,  22,  22,  53,  53,  84,  84,  115, 115, 146, 146, 177, 177, 208,
+  208, 239, 239, 270, 270, 301, 301, 332, 332, 363, 363, 394, 394, 425, 425,
+  456, 456, 487, 23,  23,  23,  54,  54,  85,  85,  116, 116, 147, 147, 178,
+  178, 209, 209, 240, 240, 271, 271, 302, 302, 333, 333, 364, 364, 395, 395,
+  426, 426, 457, 457, 488, 24,  24,  24,  55,  55,  86,  86,  117, 117, 148,
+  148, 179, 179, 210, 210, 241, 241, 272, 272, 303, 303, 334, 334, 365, 365,
+  396, 396, 427, 427, 458, 458, 489, 25,  25,  25,  56,  56,  87,  87,  118,
+  118, 149, 149, 180, 180, 211, 211, 242, 242, 273, 273, 304, 304, 335, 335,
+  366, 366, 397, 397, 428, 428, 459, 459, 490, 26,  26,  26,  57,  57,  88,
+  88,  119, 119, 150, 150, 181, 181, 212, 212, 243, 243, 274, 274, 305, 305,
+  336, 336, 367, 367, 398, 398, 429, 429, 460, 460, 491, 27,  27,  27,  58,
+  58,  89,  89,  120, 120, 151, 151, 182, 182, 213, 213, 244, 244, 275, 275,
+  306, 306, 337, 337, 368, 368, 399, 399, 430, 430, 461, 461, 492, 28,  28,
+  28,  59,  59,  90,  90,  121, 121, 152, 152, 183, 183, 214, 214, 245, 245,
+  276, 276, 307, 307, 338, 338, 369, 369, 400, 400, 431, 431, 462, 462, 493,
+  29,  29,  29,  60,  60,  91,  91,  122, 122, 153, 153, 184, 184, 215, 215,
+  246, 246, 277, 277, 308, 308, 339, 339, 370, 370, 401, 401, 432, 432, 463,
+  463, 494, 30,  30,  30,  61,  61,  92,  92,  123, 123, 154, 154, 185, 185,
+  216, 216, 247, 247, 278, 278, 309, 309, 340, 340, 371, 371, 402, 402, 433,
+  433, 464, 464, 495, 31,  62,  62,  93,  93,  124, 124, 155, 155, 186, 186,
+  217, 217, 248, 248, 279, 279, 310, 310, 341, 341, 372, 372, 403, 403, 434,
+  434, 465, 465, 496, 63,  94,  94,  125, 125, 156, 156, 187, 187, 218, 218,
+  249, 249, 280, 280, 311, 311, 342, 342, 373, 373, 404, 404, 435, 435, 466,
+  466, 497, 95,  126, 126, 157, 157, 188, 188, 219, 219, 250, 250, 281, 281,
+  312, 312, 343, 343, 374, 374, 405, 405, 436, 436, 467, 467, 498, 127, 158,
+  158, 189, 189, 220, 220, 251, 251, 282, 282, 313, 313, 344, 344, 375, 375,
+  406, 406, 437, 437, 468, 468, 499, 159, 190, 190, 221, 221, 252, 252, 283,
+  283, 314, 314, 345, 345, 376, 376, 407, 407, 438, 438, 469, 469, 500, 191,
+  222, 222, 253, 253, 284, 284, 315, 315, 346, 346, 377, 377, 408, 408, 439,
+  439, 470, 470, 501, 223, 254, 254, 285, 285, 316, 316, 347, 347, 378, 378,
+  409, 409, 440, 440, 471, 471, 502, 255, 286, 286, 317, 317, 348, 348, 379,
+  379, 410, 410, 441, 441, 472, 472, 503, 287, 318, 318, 349, 349, 380, 380,
+  411, 411, 442, 442, 473, 473, 504, 319, 350, 350, 381, 381, 412, 412, 443,
+  443, 474, 474, 505, 351, 382, 382, 413, 413, 444, 444, 475, 475, 506, 383,
+  414, 414, 445, 445, 476, 476, 507, 415, 446, 446, 477, 477, 508, 447, 478,
+  478, 509, 479, 510, 0,   0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                mcol_scan_16x32_neighbors[513 * MAX_NEIGHBORS]) = {
+  0,   0,   0,   0,   16,  16,  32,  32,  48,  48,  64,  64,  80,  80,  96,
+  96,  112, 112, 128, 128, 144, 144, 160, 160, 176, 176, 192, 192, 208, 208,
+  224, 224, 240, 240, 256, 256, 272, 272, 288, 288, 304, 304, 320, 320, 336,
+  336, 352, 352, 368, 368, 384, 384, 400, 400, 416, 416, 432, 432, 448, 448,
+  464, 464, 480, 480, 0,   0,   1,   16,  17,  32,  33,  48,  49,  64,  65,
+  80,  81,  96,  97,  112, 113, 128, 129, 144, 145, 160, 161, 176, 177, 192,
+  193, 208, 209, 224, 225, 240, 241, 256, 257, 272, 273, 288, 289, 304, 305,
+  320, 321, 336, 337, 352, 353, 368, 369, 384, 385, 400, 401, 416, 417, 432,
+  433, 448, 449, 464, 465, 480, 481, 496, 1,   1,   2,   17,  18,  33,  34,
+  49,  50,  65,  66,  81,  82,  97,  98,  113, 114, 129, 130, 145, 146, 161,
+  162, 177, 178, 193, 194, 209, 210, 225, 226, 241, 242, 257, 258, 273, 274,
+  289, 290, 305, 306, 321, 322, 337, 338, 353, 354, 369, 370, 385, 386, 401,
+  402, 417, 418, 433, 434, 449, 450, 465, 466, 481, 482, 497, 2,   2,   3,
+  18,  19,  34,  35,  50,  51,  66,  67,  82,  83,  98,  99,  114, 115, 130,
+  131, 146, 147, 162, 163, 178, 179, 194, 195, 210, 211, 226, 227, 242, 243,
+  258, 259, 274, 275, 290, 291, 306, 307, 322, 323, 338, 339, 354, 355, 370,
+  371, 386, 387, 402, 403, 418, 419, 434, 435, 450, 451, 466, 467, 482, 483,
+  498, 3,   3,   4,   19,  20,  35,  36,  51,  52,  67,  68,  83,  84,  99,
+  100, 115, 116, 131, 132, 147, 148, 163, 164, 179, 180, 195, 196, 211, 212,
+  227, 228, 243, 244, 259, 260, 275, 276, 291, 292, 307, 308, 323, 324, 339,
+  340, 355, 356, 371, 372, 387, 388, 403, 404, 419, 420, 435, 436, 451, 452,
+  467, 468, 483, 484, 499, 4,   4,   5,   20,  21,  36,  37,  52,  53,  68,
+  69,  84,  85,  100, 101, 116, 117, 132, 133, 148, 149, 164, 165, 180, 181,
+  196, 197, 212, 213, 228, 229, 244, 245, 260, 261, 276, 277, 292, 293, 308,
+  309, 324, 325, 340, 341, 356, 357, 372, 373, 388, 389, 404, 405, 420, 421,
+  436, 437, 452, 453, 468, 469, 484, 485, 500, 5,   5,   6,   21,  22,  37,
+  38,  53,  54,  69,  70,  85,  86,  101, 102, 117, 118, 133, 134, 149, 150,
+  165, 166, 181, 182, 197, 198, 213, 214, 229, 230, 245, 246, 261, 262, 277,
+  278, 293, 294, 309, 310, 325, 326, 341, 342, 357, 358, 373, 374, 389, 390,
+  405, 406, 421, 422, 437, 438, 453, 454, 469, 470, 485, 486, 501, 6,   6,
+  7,   22,  23,  38,  39,  54,  55,  70,  71,  86,  87,  102, 103, 118, 119,
+  134, 135, 150, 151, 166, 167, 182, 183, 198, 199, 214, 215, 230, 231, 246,
+  247, 262, 263, 278, 279, 294, 295, 310, 311, 326, 327, 342, 343, 358, 359,
+  374, 375, 390, 391, 406, 407, 422, 423, 438, 439, 454, 455, 470, 471, 486,
+  487, 502, 7,   7,   8,   23,  24,  39,  40,  55,  56,  71,  72,  87,  88,
+  103, 104, 119, 120, 135, 136, 151, 152, 167, 168, 183, 184, 199, 200, 215,
+  216, 231, 232, 247, 248, 263, 264, 279, 280, 295, 296, 311, 312, 327, 328,
+  343, 344, 359, 360, 375, 376, 391, 392, 407, 408, 423, 424, 439, 440, 455,
+  456, 471, 472, 487, 488, 503, 8,   8,   9,   24,  25,  40,  41,  56,  57,
+  72,  73,  88,  89,  104, 105, 120, 121, 136, 137, 152, 153, 168, 169, 184,
+  185, 200, 201, 216, 217, 232, 233, 248, 249, 264, 265, 280, 281, 296, 297,
+  312, 313, 328, 329, 344, 345, 360, 361, 376, 377, 392, 393, 408, 409, 424,
+  425, 440, 441, 456, 457, 472, 473, 488, 489, 504, 9,   9,   10,  25,  26,
+  41,  42,  57,  58,  73,  74,  89,  90,  105, 106, 121, 122, 137, 138, 153,
+  154, 169, 170, 185, 186, 201, 202, 217, 218, 233, 234, 249, 250, 265, 266,
+  281, 282, 297, 298, 313, 314, 329, 330, 345, 346, 361, 362, 377, 378, 393,
+  394, 409, 410, 425, 426, 441, 442, 457, 458, 473, 474, 489, 490, 505, 10,
+  10,  11,  26,  27,  42,  43,  58,  59,  74,  75,  90,  91,  106, 107, 122,
+  123, 138, 139, 154, 155, 170, 171, 186, 187, 202, 203, 218, 219, 234, 235,
+  250, 251, 266, 267, 282, 283, 298, 299, 314, 315, 330, 331, 346, 347, 362,
+  363, 378, 379, 394, 395, 410, 411, 426, 427, 442, 443, 458, 459, 474, 475,
+  490, 491, 506, 11,  11,  12,  27,  28,  43,  44,  59,  60,  75,  76,  91,
+  92,  107, 108, 123, 124, 139, 140, 155, 156, 171, 172, 187, 188, 203, 204,
+  219, 220, 235, 236, 251, 252, 267, 268, 283, 284, 299, 300, 315, 316, 331,
+  332, 347, 348, 363, 364, 379, 380, 395, 396, 411, 412, 427, 428, 443, 444,
+  459, 460, 475, 476, 491, 492, 507, 12,  12,  13,  28,  29,  44,  45,  60,
+  61,  76,  77,  92,  93,  108, 109, 124, 125, 140, 141, 156, 157, 172, 173,
+  188, 189, 204, 205, 220, 221, 236, 237, 252, 253, 268, 269, 284, 285, 300,
+  301, 316, 317, 332, 333, 348, 349, 364, 365, 380, 381, 396, 397, 412, 413,
+  428, 429, 444, 445, 460, 461, 476, 477, 492, 493, 508, 13,  13,  14,  29,
+  30,  45,  46,  61,  62,  77,  78,  93,  94,  109, 110, 125, 126, 141, 142,
+  157, 158, 173, 174, 189, 190, 205, 206, 221, 222, 237, 238, 253, 254, 269,
+  270, 285, 286, 301, 302, 317, 318, 333, 334, 349, 350, 365, 366, 381, 382,
+  397, 398, 413, 414, 429, 430, 445, 446, 461, 462, 477, 478, 493, 494, 509,
+  14,  14,  15,  30,  31,  46,  47,  62,  63,  78,  79,  94,  95,  110, 111,
+  126, 127, 142, 143, 158, 159, 174, 175, 190, 191, 206, 207, 222, 223, 238,
+  239, 254, 255, 270, 271, 286, 287, 302, 303, 318, 319, 334, 335, 350, 351,
+  366, 367, 382, 383, 398, 399, 414, 415, 430, 431, 446, 447, 462, 463, 478,
+  479, 494, 495, 510, 0,   0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                mcol_scan_32x16_neighbors[513 * MAX_NEIGHBORS]) = {
+  0,   0,   0,   0,   32,  32,  64,  64,  96,  96,  128, 128, 160, 160, 192,
+  192, 224, 224, 256, 256, 288, 288, 320, 320, 352, 352, 384, 384, 416, 416,
+  448, 448, 0,   0,   1,   32,  33,  64,  65,  96,  97,  128, 129, 160, 161,
+  192, 193, 224, 225, 256, 257, 288, 289, 320, 321, 352, 353, 384, 385, 416,
+  417, 448, 449, 480, 1,   1,   2,   33,  34,  65,  66,  97,  98,  129, 130,
+  161, 162, 193, 194, 225, 226, 257, 258, 289, 290, 321, 322, 353, 354, 385,
+  386, 417, 418, 449, 450, 481, 2,   2,   3,   34,  35,  66,  67,  98,  99,
+  130, 131, 162, 163, 194, 195, 226, 227, 258, 259, 290, 291, 322, 323, 354,
+  355, 386, 387, 418, 419, 450, 451, 482, 3,   3,   4,   35,  36,  67,  68,
+  99,  100, 131, 132, 163, 164, 195, 196, 227, 228, 259, 260, 291, 292, 323,
+  324, 355, 356, 387, 388, 419, 420, 451, 452, 483, 4,   4,   5,   36,  37,
+  68,  69,  100, 101, 132, 133, 164, 165, 196, 197, 228, 229, 260, 261, 292,
+  293, 324, 325, 356, 357, 388, 389, 420, 421, 452, 453, 484, 5,   5,   6,
+  37,  38,  69,  70,  101, 102, 133, 134, 165, 166, 197, 198, 229, 230, 261,
+  262, 293, 294, 325, 326, 357, 358, 389, 390, 421, 422, 453, 454, 485, 6,
+  6,   7,   38,  39,  70,  71,  102, 103, 134, 135, 166, 167, 198, 199, 230,
+  231, 262, 263, 294, 295, 326, 327, 358, 359, 390, 391, 422, 423, 454, 455,
+  486, 7,   7,   8,   39,  40,  71,  72,  103, 104, 135, 136, 167, 168, 199,
+  200, 231, 232, 263, 264, 295, 296, 327, 328, 359, 360, 391, 392, 423, 424,
+  455, 456, 487, 8,   8,   9,   40,  41,  72,  73,  104, 105, 136, 137, 168,
+  169, 200, 201, 232, 233, 264, 265, 296, 297, 328, 329, 360, 361, 392, 393,
+  424, 425, 456, 457, 488, 9,   9,   10,  41,  42,  73,  74,  105, 106, 137,
+  138, 169, 170, 201, 202, 233, 234, 265, 266, 297, 298, 329, 330, 361, 362,
+  393, 394, 425, 426, 457, 458, 489, 10,  10,  11,  42,  43,  74,  75,  106,
+  107, 138, 139, 170, 171, 202, 203, 234, 235, 266, 267, 298, 299, 330, 331,
+  362, 363, 394, 395, 426, 427, 458, 459, 490, 11,  11,  12,  43,  44,  75,
+  76,  107, 108, 139, 140, 171, 172, 203, 204, 235, 236, 267, 268, 299, 300,
+  331, 332, 363, 364, 395, 396, 427, 428, 459, 460, 491, 12,  12,  13,  44,
+  45,  76,  77,  108, 109, 140, 141, 172, 173, 204, 205, 236, 237, 268, 269,
+  300, 301, 332, 333, 364, 365, 396, 397, 428, 429, 460, 461, 492, 13,  13,
+  14,  45,  46,  77,  78,  109, 110, 141, 142, 173, 174, 205, 206, 237, 238,
+  269, 270, 301, 302, 333, 334, 365, 366, 397, 398, 429, 430, 461, 462, 493,
+  14,  14,  15,  46,  47,  78,  79,  110, 111, 142, 143, 174, 175, 206, 207,
+  238, 239, 270, 271, 302, 303, 334, 335, 366, 367, 398, 399, 430, 431, 462,
+  463, 494, 15,  15,  16,  47,  48,  79,  80,  111, 112, 143, 144, 175, 176,
+  207, 208, 239, 240, 271, 272, 303, 304, 335, 336, 367, 368, 399, 400, 431,
+  432, 463, 464, 495, 16,  16,  17,  48,  49,  80,  81,  112, 113, 144, 145,
+  176, 177, 208, 209, 240, 241, 272, 273, 304, 305, 336, 337, 368, 369, 400,
+  401, 432, 433, 464, 465, 496, 17,  17,  18,  49,  50,  81,  82,  113, 114,
+  145, 146, 177, 178, 209, 210, 241, 242, 273, 274, 305, 306, 337, 338, 369,
+  370, 401, 402, 433, 434, 465, 466, 497, 18,  18,  19,  50,  51,  82,  83,
+  114, 115, 146, 147, 178, 179, 210, 211, 242, 243, 274, 275, 306, 307, 338,
+  339, 370, 371, 402, 403, 434, 435, 466, 467, 498, 19,  19,  20,  51,  52,
+  83,  84,  115, 116, 147, 148, 179, 180, 211, 212, 243, 244, 275, 276, 307,
+  308, 339, 340, 371, 372, 403, 404, 435, 436, 467, 468, 499, 20,  20,  21,
+  52,  53,  84,  85,  116, 117, 148, 149, 180, 181, 212, 213, 244, 245, 276,
+  277, 308, 309, 340, 341, 372, 373, 404, 405, 436, 437, 468, 469, 500, 21,
+  21,  22,  53,  54,  85,  86,  117, 118, 149, 150, 181, 182, 213, 214, 245,
+  246, 277, 278, 309, 310, 341, 342, 373, 374, 405, 406, 437, 438, 469, 470,
+  501, 22,  22,  23,  54,  55,  86,  87,  118, 119, 150, 151, 182, 183, 214,
+  215, 246, 247, 278, 279, 310, 311, 342, 343, 374, 375, 406, 407, 438, 439,
+  470, 471, 502, 23,  23,  24,  55,  56,  87,  88,  119, 120, 151, 152, 183,
+  184, 215, 216, 247, 248, 279, 280, 311, 312, 343, 344, 375, 376, 407, 408,
+  439, 440, 471, 472, 503, 24,  24,  25,  56,  57,  88,  89,  120, 121, 152,
+  153, 184, 185, 216, 217, 248, 249, 280, 281, 312, 313, 344, 345, 376, 377,
+  408, 409, 440, 441, 472, 473, 504, 25,  25,  26,  57,  58,  89,  90,  121,
+  122, 153, 154, 185, 186, 217, 218, 249, 250, 281, 282, 313, 314, 345, 346,
+  377, 378, 409, 410, 441, 442, 473, 474, 505, 26,  26,  27,  58,  59,  90,
+  91,  122, 123, 154, 155, 186, 187, 218, 219, 250, 251, 282, 283, 314, 315,
+  346, 347, 378, 379, 410, 411, 442, 443, 474, 475, 506, 27,  27,  28,  59,
+  60,  91,  92,  123, 124, 155, 156, 187, 188, 219, 220, 251, 252, 283, 284,
+  315, 316, 347, 348, 379, 380, 411, 412, 443, 444, 475, 476, 507, 28,  28,
+  29,  60,  61,  92,  93,  124, 125, 156, 157, 188, 189, 220, 221, 252, 253,
+  284, 285, 316, 317, 348, 349, 380, 381, 412, 413, 444, 445, 476, 477, 508,
+  29,  29,  30,  61,  62,  93,  94,  125, 126, 157, 158, 189, 190, 221, 222,
+  253, 254, 285, 286, 317, 318, 349, 350, 381, 382, 413, 414, 445, 446, 477,
+  478, 509, 30,  30,  31,  62,  63,  94,  95,  126, 127, 158, 159, 190, 191,
+  222, 223, 254, 255, 286, 287, 318, 319, 350, 351, 382, 383, 414, 415, 446,
+  447, 478, 479, 510, 0,   0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                mrow_scan_16x32_neighbors[513 * MAX_NEIGHBORS]) = {
+  0,   0,   0,   0,   1,   1,   2,   2,   3,   3,   4,   4,   5,   5,   6,
+  6,   7,   7,   8,   8,   9,   9,   10,  10,  11,  11,  12,  12,  13,  13,
+  14,  14,  0,   0,   1,   16,  2,   17,  3,   18,  4,   19,  5,   20,  6,
+  21,  7,   22,  8,   23,  9,   24,  10,  25,  11,  26,  12,  27,  13,  28,
+  14,  29,  15,  30,  16,  16,  17,  32,  18,  33,  19,  34,  20,  35,  21,
+  36,  22,  37,  23,  38,  24,  39,  25,  40,  26,  41,  27,  42,  28,  43,
+  29,  44,  30,  45,  31,  46,  32,  32,  33,  48,  34,  49,  35,  50,  36,
+  51,  37,  52,  38,  53,  39,  54,  40,  55,  41,  56,  42,  57,  43,  58,
+  44,  59,  45,  60,  46,  61,  47,  62,  48,  48,  49,  64,  50,  65,  51,
+  66,  52,  67,  53,  68,  54,  69,  55,  70,  56,  71,  57,  72,  58,  73,
+  59,  74,  60,  75,  61,  76,  62,  77,  63,  78,  64,  64,  65,  80,  66,
+  81,  67,  82,  68,  83,  69,  84,  70,  85,  71,  86,  72,  87,  73,  88,
+  74,  89,  75,  90,  76,  91,  77,  92,  78,  93,  79,  94,  80,  80,  81,
+  96,  82,  97,  83,  98,  84,  99,  85,  100, 86,  101, 87,  102, 88,  103,
+  89,  104, 90,  105, 91,  106, 92,  107, 93,  108, 94,  109, 95,  110, 96,
+  96,  97,  112, 98,  113, 99,  114, 100, 115, 101, 116, 102, 117, 103, 118,
+  104, 119, 105, 120, 106, 121, 107, 122, 108, 123, 109, 124, 110, 125, 111,
+  126, 112, 112, 113, 128, 114, 129, 115, 130, 116, 131, 117, 132, 118, 133,
+  119, 134, 120, 135, 121, 136, 122, 137, 123, 138, 124, 139, 125, 140, 126,
+  141, 127, 142, 128, 128, 129, 144, 130, 145, 131, 146, 132, 147, 133, 148,
+  134, 149, 135, 150, 136, 151, 137, 152, 138, 153, 139, 154, 140, 155, 141,
+  156, 142, 157, 143, 158, 144, 144, 145, 160, 146, 161, 147, 162, 148, 163,
+  149, 164, 150, 165, 151, 166, 152, 167, 153, 168, 154, 169, 155, 170, 156,
+  171, 157, 172, 158, 173, 159, 174, 160, 160, 161, 176, 162, 177, 163, 178,
+  164, 179, 165, 180, 166, 181, 167, 182, 168, 183, 169, 184, 170, 185, 171,
+  186, 172, 187, 173, 188, 174, 189, 175, 190, 176, 176, 177, 192, 178, 193,
+  179, 194, 180, 195, 181, 196, 182, 197, 183, 198, 184, 199, 185, 200, 186,
+  201, 187, 202, 188, 203, 189, 204, 190, 205, 191, 206, 192, 192, 193, 208,
+  194, 209, 195, 210, 196, 211, 197, 212, 198, 213, 199, 214, 200, 215, 201,
+  216, 202, 217, 203, 218, 204, 219, 205, 220, 206, 221, 207, 222, 208, 208,
+  209, 224, 210, 225, 211, 226, 212, 227, 213, 228, 214, 229, 215, 230, 216,
+  231, 217, 232, 218, 233, 219, 234, 220, 235, 221, 236, 222, 237, 223, 238,
+  224, 224, 225, 240, 226, 241, 227, 242, 228, 243, 229, 244, 230, 245, 231,
+  246, 232, 247, 233, 248, 234, 249, 235, 250, 236, 251, 237, 252, 238, 253,
+  239, 254, 240, 240, 241, 256, 242, 257, 243, 258, 244, 259, 245, 260, 246,
+  261, 247, 262, 248, 263, 249, 264, 250, 265, 251, 266, 252, 267, 253, 268,
+  254, 269, 255, 270, 256, 256, 257, 272, 258, 273, 259, 274, 260, 275, 261,
+  276, 262, 277, 263, 278, 264, 279, 265, 280, 266, 281, 267, 282, 268, 283,
+  269, 284, 270, 285, 271, 286, 272, 272, 273, 288, 274, 289, 275, 290, 276,
+  291, 277, 292, 278, 293, 279, 294, 280, 295, 281, 296, 282, 297, 283, 298,
+  284, 299, 285, 300, 286, 301, 287, 302, 288, 288, 289, 304, 290, 305, 291,
+  306, 292, 307, 293, 308, 294, 309, 295, 310, 296, 311, 297, 312, 298, 313,
+  299, 314, 300, 315, 301, 316, 302, 317, 303, 318, 304, 304, 305, 320, 306,
+  321, 307, 322, 308, 323, 309, 324, 310, 325, 311, 326, 312, 327, 313, 328,
+  314, 329, 315, 330, 316, 331, 317, 332, 318, 333, 319, 334, 320, 320, 321,
+  336, 322, 337, 323, 338, 324, 339, 325, 340, 326, 341, 327, 342, 328, 343,
+  329, 344, 330, 345, 331, 346, 332, 347, 333, 348, 334, 349, 335, 350, 336,
+  336, 337, 352, 338, 353, 339, 354, 340, 355, 341, 356, 342, 357, 343, 358,
+  344, 359, 345, 360, 346, 361, 347, 362, 348, 363, 349, 364, 350, 365, 351,
+  366, 352, 352, 353, 368, 354, 369, 355, 370, 356, 371, 357, 372, 358, 373,
+  359, 374, 360, 375, 361, 376, 362, 377, 363, 378, 364, 379, 365, 380, 366,
+  381, 367, 382, 368, 368, 369, 384, 370, 385, 371, 386, 372, 387, 373, 388,
+  374, 389, 375, 390, 376, 391, 377, 392, 378, 393, 379, 394, 380, 395, 381,
+  396, 382, 397, 383, 398, 384, 384, 385, 400, 386, 401, 387, 402, 388, 403,
+  389, 404, 390, 405, 391, 406, 392, 407, 393, 408, 394, 409, 395, 410, 396,
+  411, 397, 412, 398, 413, 399, 414, 400, 400, 401, 416, 402, 417, 403, 418,
+  404, 419, 405, 420, 406, 421, 407, 422, 408, 423, 409, 424, 410, 425, 411,
+  426, 412, 427, 413, 428, 414, 429, 415, 430, 416, 416, 417, 432, 418, 433,
+  419, 434, 420, 435, 421, 436, 422, 437, 423, 438, 424, 439, 425, 440, 426,
+  441, 427, 442, 428, 443, 429, 444, 430, 445, 431, 446, 432, 432, 433, 448,
+  434, 449, 435, 450, 436, 451, 437, 452, 438, 453, 439, 454, 440, 455, 441,
+  456, 442, 457, 443, 458, 444, 459, 445, 460, 446, 461, 447, 462, 448, 448,
+  449, 464, 450, 465, 451, 466, 452, 467, 453, 468, 454, 469, 455, 470, 456,
+  471, 457, 472, 458, 473, 459, 474, 460, 475, 461, 476, 462, 477, 463, 478,
+  464, 464, 465, 480, 466, 481, 467, 482, 468, 483, 469, 484, 470, 485, 471,
+  486, 472, 487, 473, 488, 474, 489, 475, 490, 476, 491, 477, 492, 478, 493,
+  479, 494, 480, 480, 481, 496, 482, 497, 483, 498, 484, 499, 485, 500, 486,
+  501, 487, 502, 488, 503, 489, 504, 490, 505, 491, 506, 492, 507, 493, 508,
+  494, 509, 495, 510, 0,   0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                mrow_scan_32x16_neighbors[513 * MAX_NEIGHBORS]) = {
+  0,   0,   0,   0,   1,   1,   2,   2,   3,   3,   4,   4,   5,   5,   6,
+  6,   7,   7,   8,   8,   9,   9,   10,  10,  11,  11,  12,  12,  13,  13,
+  14,  14,  15,  15,  16,  16,  17,  17,  18,  18,  19,  19,  20,  20,  21,
+  21,  22,  22,  23,  23,  24,  24,  25,  25,  26,  26,  27,  27,  28,  28,
+  29,  29,  30,  30,  0,   0,   1,   32,  2,   33,  3,   34,  4,   35,  5,
+  36,  6,   37,  7,   38,  8,   39,  9,   40,  10,  41,  11,  42,  12,  43,
+  13,  44,  14,  45,  15,  46,  16,  47,  17,  48,  18,  49,  19,  50,  20,
+  51,  21,  52,  22,  53,  23,  54,  24,  55,  25,  56,  26,  57,  27,  58,
+  28,  59,  29,  60,  30,  61,  31,  62,  32,  32,  33,  64,  34,  65,  35,
+  66,  36,  67,  37,  68,  38,  69,  39,  70,  40,  71,  41,  72,  42,  73,
+  43,  74,  44,  75,  45,  76,  46,  77,  47,  78,  48,  79,  49,  80,  50,
+  81,  51,  82,  52,  83,  53,  84,  54,  85,  55,  86,  56,  87,  57,  88,
+  58,  89,  59,  90,  60,  91,  61,  92,  62,  93,  63,  94,  64,  64,  65,
+  96,  66,  97,  67,  98,  68,  99,  69,  100, 70,  101, 71,  102, 72,  103,
+  73,  104, 74,  105, 75,  106, 76,  107, 77,  108, 78,  109, 79,  110, 80,
+  111, 81,  112, 82,  113, 83,  114, 84,  115, 85,  116, 86,  117, 87,  118,
+  88,  119, 89,  120, 90,  121, 91,  122, 92,  123, 93,  124, 94,  125, 95,
+  126, 96,  96,  97,  128, 98,  129, 99,  130, 100, 131, 101, 132, 102, 133,
+  103, 134, 104, 135, 105, 136, 106, 137, 107, 138, 108, 139, 109, 140, 110,
+  141, 111, 142, 112, 143, 113, 144, 114, 145, 115, 146, 116, 147, 117, 148,
+  118, 149, 119, 150, 120, 151, 121, 152, 122, 153, 123, 154, 124, 155, 125,
+  156, 126, 157, 127, 158, 128, 128, 129, 160, 130, 161, 131, 162, 132, 163,
+  133, 164, 134, 165, 135, 166, 136, 167, 137, 168, 138, 169, 139, 170, 140,
+  171, 141, 172, 142, 173, 143, 174, 144, 175, 145, 176, 146, 177, 147, 178,
+  148, 179, 149, 180, 150, 181, 151, 182, 152, 183, 153, 184, 154, 185, 155,
+  186, 156, 187, 157, 188, 158, 189, 159, 190, 160, 160, 161, 192, 162, 193,
+  163, 194, 164, 195, 165, 196, 166, 197, 167, 198, 168, 199, 169, 200, 170,
+  201, 171, 202, 172, 203, 173, 204, 174, 205, 175, 206, 176, 207, 177, 208,
+  178, 209, 179, 210, 180, 211, 181, 212, 182, 213, 183, 214, 184, 215, 185,
+  216, 186, 217, 187, 218, 188, 219, 189, 220, 190, 221, 191, 222, 192, 192,
+  193, 224, 194, 225, 195, 226, 196, 227, 197, 228, 198, 229, 199, 230, 200,
+  231, 201, 232, 202, 233, 203, 234, 204, 235, 205, 236, 206, 237, 207, 238,
+  208, 239, 209, 240, 210, 241, 211, 242, 212, 243, 213, 244, 214, 245, 215,
+  246, 216, 247, 217, 248, 218, 249, 219, 250, 220, 251, 221, 252, 222, 253,
+  223, 254, 224, 224, 225, 256, 226, 257, 227, 258, 228, 259, 229, 260, 230,
+  261, 231, 262, 232, 263, 233, 264, 234, 265, 235, 266, 236, 267, 237, 268,
+  238, 269, 239, 270, 240, 271, 241, 272, 242, 273, 243, 274, 244, 275, 245,
+  276, 246, 277, 247, 278, 248, 279, 249, 280, 250, 281, 251, 282, 252, 283,
+  253, 284, 254, 285, 255, 286, 256, 256, 257, 288, 258, 289, 259, 290, 260,
+  291, 261, 292, 262, 293, 263, 294, 264, 295, 265, 296, 266, 297, 267, 298,
+  268, 299, 269, 300, 270, 301, 271, 302, 272, 303, 273, 304, 274, 305, 275,
+  306, 276, 307, 277, 308, 278, 309, 279, 310, 280, 311, 281, 312, 282, 313,
+  283, 314, 284, 315, 285, 316, 286, 317, 287, 318, 288, 288, 289, 320, 290,
+  321, 291, 322, 292, 323, 293, 324, 294, 325, 295, 326, 296, 327, 297, 328,
+  298, 329, 299, 330, 300, 331, 301, 332, 302, 333, 303, 334, 304, 335, 305,
+  336, 306, 337, 307, 338, 308, 339, 309, 340, 310, 341, 311, 342, 312, 343,
+  313, 344, 314, 345, 315, 346, 316, 347, 317, 348, 318, 349, 319, 350, 320,
+  320, 321, 352, 322, 353, 323, 354, 324, 355, 325, 356, 326, 357, 327, 358,
+  328, 359, 329, 360, 330, 361, 331, 362, 332, 363, 333, 364, 334, 365, 335,
+  366, 336, 367, 337, 368, 338, 369, 339, 370, 340, 371, 341, 372, 342, 373,
+  343, 374, 344, 375, 345, 376, 346, 377, 347, 378, 348, 379, 349, 380, 350,
+  381, 351, 382, 352, 352, 353, 384, 354, 385, 355, 386, 356, 387, 357, 388,
+  358, 389, 359, 390, 360, 391, 361, 392, 362, 393, 363, 394, 364, 395, 365,
+  396, 366, 397, 367, 398, 368, 399, 369, 400, 370, 401, 371, 402, 372, 403,
+  373, 404, 374, 405, 375, 406, 376, 407, 377, 408, 378, 409, 379, 410, 380,
+  411, 381, 412, 382, 413, 383, 414, 384, 384, 385, 416, 386, 417, 387, 418,
+  388, 419, 389, 420, 390, 421, 391, 422, 392, 423, 393, 424, 394, 425, 395,
+  426, 396, 427, 397, 428, 398, 429, 399, 430, 400, 431, 401, 432, 402, 433,
+  403, 434, 404, 435, 405, 436, 406, 437, 407, 438, 408, 439, 409, 440, 410,
+  441, 411, 442, 412, 443, 413, 444, 414, 445, 415, 446, 416, 416, 417, 448,
+  418, 449, 419, 450, 420, 451, 421, 452, 422, 453, 423, 454, 424, 455, 425,
+  456, 426, 457, 427, 458, 428, 459, 429, 460, 430, 461, 431, 462, 432, 463,
+  433, 464, 434, 465, 435, 466, 436, 467, 437, 468, 438, 469, 439, 470, 440,
+  471, 441, 472, 442, 473, 443, 474, 444, 475, 445, 476, 446, 477, 447, 478,
+  448, 448, 449, 480, 450, 481, 451, 482, 452, 483, 453, 484, 454, 485, 455,
+  486, 456, 487, 457, 488, 458, 489, 459, 490, 460, 491, 461, 492, 462, 493,
+  463, 494, 464, 495, 465, 496, 466, 497, 467, 498, 468, 499, 469, 500, 470,
+  501, 471, 502, 472, 503, 473, 504, 474, 505, 475, 506, 476, 507, 477, 508,
+  478, 509, 479, 510, 0,   0
+};
+
+#endif  // CONFIG_EXT_TX
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t,
+                mcol_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = {
+  0,   0,   0,   0,   16,  16,  32,  32,  48,  48,  64,  64,  80,  80,  96,
+  96,  112, 112, 128, 128, 144, 144, 160, 160, 176, 176, 192, 192, 208, 208,
+  224, 224, 0,   0,   1,   16,  17,  32,  33,  48,  49,  64,  65,  80,  81,
+  96,  97,  112, 113, 128, 129, 144, 145, 160, 161, 176, 177, 192, 193, 208,
+  209, 224, 225, 240, 1,   1,   2,   17,  18,  33,  34,  49,  50,  65,  66,
+  81,  82,  97,  98,  113, 114, 129, 130, 145, 146, 161, 162, 177, 178, 193,
+  194, 209, 210, 225, 226, 241, 2,   2,   3,   18,  19,  34,  35,  50,  51,
+  66,  67,  82,  83,  98,  99,  114, 115, 130, 131, 146, 147, 162, 163, 178,
+  179, 194, 195, 210, 211, 226, 227, 242, 3,   3,   4,   19,  20,  35,  36,
+  51,  52,  67,  68,  83,  84,  99,  100, 115, 116, 131, 132, 147, 148, 163,
+  164, 179, 180, 195, 196, 211, 212, 227, 228, 243, 4,   4,   5,   20,  21,
+  36,  37,  52,  53,  68,  69,  84,  85,  100, 101, 116, 117, 132, 133, 148,
+  149, 164, 165, 180, 181, 196, 197, 212, 213, 228, 229, 244, 5,   5,   6,
+  21,  22,  37,  38,  53,  54,  69,  70,  85,  86,  101, 102, 117, 118, 133,
+  134, 149, 150, 165, 166, 181, 182, 197, 198, 213, 214, 229, 230, 245, 6,
+  6,   7,   22,  23,  38,  39,  54,  55,  70,  71,  86,  87,  102, 103, 118,
+  119, 134, 135, 150, 151, 166, 167, 182, 183, 198, 199, 214, 215, 230, 231,
+  246, 7,   7,   8,   23,  24,  39,  40,  55,  56,  71,  72,  87,  88,  103,
+  104, 119, 120, 135, 136, 151, 152, 167, 168, 183, 184, 199, 200, 215, 216,
+  231, 232, 247, 8,   8,   9,   24,  25,  40,  41,  56,  57,  72,  73,  88,
+  89,  104, 105, 120, 121, 136, 137, 152, 153, 168, 169, 184, 185, 200, 201,
+  216, 217, 232, 233, 248, 9,   9,   10,  25,  26,  41,  42,  57,  58,  73,
+  74,  89,  90,  105, 106, 121, 122, 137, 138, 153, 154, 169, 170, 185, 186,
+  201, 202, 217, 218, 233, 234, 249, 10,  10,  11,  26,  27,  42,  43,  58,
+  59,  74,  75,  90,  91,  106, 107, 122, 123, 138, 139, 154, 155, 170, 171,
+  186, 187, 202, 203, 218, 219, 234, 235, 250, 11,  11,  12,  27,  28,  43,
+  44,  59,  60,  75,  76,  91,  92,  107, 108, 123, 124, 139, 140, 155, 156,
+  171, 172, 187, 188, 203, 204, 219, 220, 235, 236, 251, 12,  12,  13,  28,
+  29,  44,  45,  60,  61,  76,  77,  92,  93,  108, 109, 124, 125, 140, 141,
+  156, 157, 172, 173, 188, 189, 204, 205, 220, 221, 236, 237, 252, 13,  13,
+  14,  29,  30,  45,  46,  61,  62,  77,  78,  93,  94,  109, 110, 125, 126,
+  141, 142, 157, 158, 173, 174, 189, 190, 205, 206, 221, 222, 237, 238, 253,
+  14,  14,  15,  30,  31,  46,  47,  62,  63,  78,  79,  94,  95,  110, 111,
+  126, 127, 142, 143, 158, 159, 174, 175, 190, 191, 206, 207, 222, 223, 238,
+  239, 254, 0,   0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                mrow_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = {
+  0,   0,   0,   0,   1,   1,   2,   2,   3,   3,   4,   4,   5,   5,   6,
+  6,   7,   7,   8,   8,   9,   9,   10,  10,  11,  11,  12,  12,  13,  13,
+  14,  14,  0,   0,   1,   16,  2,   17,  3,   18,  4,   19,  5,   20,  6,
+  21,  7,   22,  8,   23,  9,   24,  10,  25,  11,  26,  12,  27,  13,  28,
+  14,  29,  15,  30,  16,  16,  17,  32,  18,  33,  19,  34,  20,  35,  21,
+  36,  22,  37,  23,  38,  24,  39,  25,  40,  26,  41,  27,  42,  28,  43,
+  29,  44,  30,  45,  31,  46,  32,  32,  33,  48,  34,  49,  35,  50,  36,
+  51,  37,  52,  38,  53,  39,  54,  40,  55,  41,  56,  42,  57,  43,  58,
+  44,  59,  45,  60,  46,  61,  47,  62,  48,  48,  49,  64,  50,  65,  51,
+  66,  52,  67,  53,  68,  54,  69,  55,  70,  56,  71,  57,  72,  58,  73,
+  59,  74,  60,  75,  61,  76,  62,  77,  63,  78,  64,  64,  65,  80,  66,
+  81,  67,  82,  68,  83,  69,  84,  70,  85,  71,  86,  72,  87,  73,  88,
+  74,  89,  75,  90,  76,  91,  77,  92,  78,  93,  79,  94,  80,  80,  81,
+  96,  82,  97,  83,  98,  84,  99,  85,  100, 86,  101, 87,  102, 88,  103,
+  89,  104, 90,  105, 91,  106, 92,  107, 93,  108, 94,  109, 95,  110, 96,
+  96,  97,  112, 98,  113, 99,  114, 100, 115, 101, 116, 102, 117, 103, 118,
+  104, 119, 105, 120, 106, 121, 107, 122, 108, 123, 109, 124, 110, 125, 111,
+  126, 112, 112, 113, 128, 114, 129, 115, 130, 116, 131, 117, 132, 118, 133,
+  119, 134, 120, 135, 121, 136, 122, 137, 123, 138, 124, 139, 125, 140, 126,
+  141, 127, 142, 128, 128, 129, 144, 130, 145, 131, 146, 132, 147, 133, 148,
+  134, 149, 135, 150, 136, 151, 137, 152, 138, 153, 139, 154, 140, 155, 141,
+  156, 142, 157, 143, 158, 144, 144, 145, 160, 146, 161, 147, 162, 148, 163,
+  149, 164, 150, 165, 151, 166, 152, 167, 153, 168, 154, 169, 155, 170, 156,
+  171, 157, 172, 158, 173, 159, 174, 160, 160, 161, 176, 162, 177, 163, 178,
+  164, 179, 165, 180, 166, 181, 167, 182, 168, 183, 169, 184, 170, 185, 171,
+  186, 172, 187, 173, 188, 174, 189, 175, 190, 176, 176, 177, 192, 178, 193,
+  179, 194, 180, 195, 181, 196, 182, 197, 183, 198, 184, 199, 185, 200, 186,
+  201, 187, 202, 188, 203, 189, 204, 190, 205, 191, 206, 192, 192, 193, 208,
+  194, 209, 195, 210, 196, 211, 197, 212, 198, 213, 199, 214, 200, 215, 201,
+  216, 202, 217, 203, 218, 204, 219, 205, 220, 206, 221, 207, 222, 208, 208,
+  209, 224, 210, 225, 211, 226, 212, 227, 213, 228, 214, 229, 215, 230, 216,
+  231, 217, 232, 218, 233, 219, 234, 220, 235, 221, 236, 222, 237, 223, 238,
+  224, 224, 225, 240, 226, 241, 227, 242, 228, 243, 229, 244, 230, 245, 231,
+  246, 232, 247, 233, 248, 234, 249, 235, 250, 236, 251, 237, 252, 238, 253,
+  239, 254, 0,   0,
+};
+#endif  // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t,
+                col_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = {
+  0,   0,   0,   0,   16,  16,  32,  32,  16,  0,   48,  48,  1,   16,  64,
+  64,  17,  32,  80,  80,  33,  48,  17,  1,   49,  64,  96,  96,  2,   17,
+  65,  80,  18,  33,  112, 112, 34,  49,  81,  96,  18,  2,   50,  65,  128,
+  128, 3,   18,  97,  112, 19,  34,  66,  81,  144, 144, 82,  97,  35,  50,
+  113, 128, 19,  3,   51,  66,  160, 160, 4,   19,  98,  113, 129, 144, 67,
+  82,  20,  35,  83,  98,  114, 129, 36,  51,  176, 176, 20,  4,   145, 160,
+  52,  67,  99,  114, 5,   20,  130, 145, 68,  83,  192, 192, 161, 176, 21,
+  36,  115, 130, 84,  99,  37,  52,  146, 161, 208, 208, 53,  68,  21,  5,
+  100, 115, 177, 192, 131, 146, 69,  84,  6,   21,  224, 224, 116, 131, 22,
+  37,  162, 177, 85,  100, 147, 162, 38,  53,  193, 208, 101, 116, 54,  69,
+  22,  6,   132, 147, 178, 193, 70,  85,  163, 178, 209, 224, 7,   22,  117,
+  132, 23,  38,  148, 163, 23,  7,   86,  101, 194, 209, 225, 240, 39,  54,
+  179, 194, 102, 117, 133, 148, 55,  70,  164, 179, 8,   23,  71,  86,  210,
+  225, 118, 133, 149, 164, 195, 210, 24,  39,  87,  102, 40,  55,  56,  71,
+  134, 149, 180, 195, 226, 241, 103, 118, 24,  8,   165, 180, 211, 226, 72,
+  87,  150, 165, 9,   24,  119, 134, 25,  40,  88,  103, 196, 211, 41,  56,
+  135, 150, 181, 196, 104, 119, 57,  72,  227, 242, 166, 181, 120, 135, 151,
+  166, 197, 212, 73,  88,  25,  9,   212, 227, 89,  104, 136, 151, 182, 197,
+  10,  25,  26,  41,  105, 120, 167, 182, 228, 243, 152, 167, 42,  57,  121,
+  136, 213, 228, 58,  73,  198, 213, 74,  89,  137, 152, 183, 198, 168, 183,
+  26,  10,  90,  105, 229, 244, 11,  26,  106, 121, 214, 229, 153, 168, 27,
+  42,  199, 214, 43,  58,  184, 199, 122, 137, 169, 184, 230, 245, 59,  74,
+  27,  11,  75,  90,  138, 153, 200, 215, 215, 230, 91,  106, 12,  27,  28,
+  43,  185, 200, 107, 122, 154, 169, 44,  59,  231, 246, 216, 231, 60,  75,
+  123, 138, 28,  12,  76,  91,  201, 216, 170, 185, 232, 247, 139, 154, 92,
+  107, 13,  28,  108, 123, 29,  44,  186, 201, 217, 232, 155, 170, 45,  60,
+  29,  13,  61,  76,  124, 139, 14,  14,  233, 248, 77,  92,  14,  29,  171,
+  186, 140, 155, 202, 217, 30,  45,  93,  108, 109, 124, 46,  61,  156, 171,
+  62,  77,  187, 202, 15,  30,  125, 140, 218, 233, 78,  93,  31,  46,  172,
+  187, 47,  62,  141, 156, 94,  109, 234, 249, 203, 218, 63,  78,  110, 125,
+  188, 203, 157, 172, 126, 141, 79,  94,  173, 188, 95,  110, 219, 234, 142,
+  157, 204, 219, 235, 250, 111, 126, 158, 173, 127, 142, 189, 204, 220, 235,
+  143, 158, 174, 189, 205, 220, 236, 251, 159, 174, 190, 205, 221, 236, 175,
+  190, 237, 252, 206, 221, 222, 237, 191, 206, 238, 253, 207, 222, 223, 238,
+  239, 254, 0,   0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                row_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = {
+  0,   0,   0,   0,   1,   1,   0,   1,   2,   2,   1,   16,  3,   3,   2,
+  17,  16,  17,  4,   4,   17,  32,  3,   18,  5,   5,   18,  33,  32,  33,
+  4,   19,  33,  48,  6,   6,   19,  34,  5,   20,  34,  49,  48,  49,  7,
+  7,   20,  35,  49,  64,  6,   21,  35,  50,  21,  36,  64,  65,  8,   8,
+  50,  65,  36,  51,  7,   22,  22,  37,  65,  80,  51,  66,  9,   9,   37,
+  52,  8,   23,  66,  81,  52,  67,  80,  81,  23,  38,  10,  10,  38,  53,
+  67,  82,  81,  96,  53,  68,  9,   24,  82,  97,  68,  83,  24,  39,  96,
+  97,  39,  54,  11,  11,  54,  69,  83,  98,  97,  112, 69,  84,  10,  25,
+  25,  40,  40,  55,  98,  113, 84,  99,  12,  12,  55,  70,  112, 113, 70,
+  85,  11,  26,  99,  114, 85,  100, 113, 128, 26,  41,  41,  56,  56,  71,
+  100, 115, 13,  13,  71,  86,  114, 129, 86,  101, 128, 129, 57,  72,  115,
+  130, 101, 116, 12,  27,  42,  57,  14,  14,  72,  87,  27,  42,  129, 144,
+  87,  102, 116, 131, 130, 145, 102, 117, 58,  73,  144, 145, 73,  88,  117,
+  132, 88,  103, 13,  28,  43,  58,  131, 146, 103, 118, 28,  43,  145, 160,
+  132, 147, 74,  89,  89,  104, 118, 133, 146, 161, 104, 119, 160, 161, 59,
+  74,  119, 134, 133, 148, 14,  29,  44,  59,  147, 162, 161, 176, 29,  44,
+  105, 120, 75,  90,  90,  105, 148, 163, 162, 177, 134, 149, 176, 177, 120,
+  135, 149, 164, 163, 178, 15,  30,  135, 150, 177, 192, 60,  75,  106, 121,
+  45,  60,  121, 136, 178, 193, 91,  106, 136, 151, 164, 179, 192, 193, 30,
+  45,  150, 165, 151, 166, 179, 194, 76,  91,  165, 180, 122, 137, 193, 208,
+  107, 122, 137, 152, 208, 209, 180, 195, 61,  76,  152, 167, 194, 209, 166,
+  181, 224, 224, 92,  107, 181, 196, 46,  61,  138, 153, 209, 224, 167, 182,
+  153, 168, 195, 210, 31,  46,  123, 138, 77,  92,  168, 183, 210, 225, 196,
+  211, 225, 240, 182, 197, 154, 169, 108, 123, 139, 154, 183, 198, 62,  77,
+  197, 212, 169, 184, 93,  108, 211, 226, 184, 199, 47,  62,  212, 227, 226,
+  241, 124, 139, 198, 213, 155, 170, 170, 185, 140, 155, 213, 228, 227, 242,
+  109, 124, 78,  93,  185, 200, 228, 243, 199, 214, 200, 215, 214, 229, 125,
+  140, 171, 186, 186, 201, 63,  78,  156, 171, 94,  109, 141, 156, 229, 244,
+  201, 216, 215, 230, 79,  94,  230, 245, 216, 231, 110, 125, 187, 202, 231,
+  246, 217, 232, 157, 172, 202, 217, 126, 141, 95,  110, 142, 157, 172, 187,
+  232, 247, 111, 126, 218, 233, 203, 218, 233, 248, 173, 188, 188, 203, 127,
+  142, 158, 173, 143, 158, 234, 249, 219, 234, 189, 204, 204, 219, 159, 174,
+  174, 189, 235, 250, 205, 220, 175, 190, 190, 205, 220, 235, 191, 206, 221,
+  236, 236, 251, 206, 221, 237, 252, 207, 222, 222, 237, 223, 238, 238, 253,
+  239, 254, 0,   0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                default_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = {
+  0,   0,   0,   0,   16,  0,   16,  16,  1,   16,  17,  1,   32,  32,  17,
+  32,  2,   17,  18,  2,   48,  48,  18,  33,  33,  48,  3,   18,  49,  64,
+  64,  65,  34,  49,  19,  3,   19,  34,  50,  65,  4,   19,  65,  80,  80,
+  81,  35,  50,  20,  4,   20,  35,  66,  81,  81,  96,  51,  66,  96,  97,
+  5,   20,  36,  51,  82,  97,  21,  36,  67,  82,  97,  112, 21,  5,   52,
+  67,  112, 113, 37,  52,  6,   21,  83,  98,  98,  113, 68,  83,  22,  6,
+  113, 128, 22,  37,  53,  68,  84,  99,  99,  114, 128, 129, 114, 129, 69,
+  84,  38,  53,  7,   22,  23,  7,   129, 144, 23,  38,  54,  69,  100, 115,
+  85,  100, 115, 130, 144, 145, 130, 145, 39,  54,  70,  85,  8,   23,  55,
+  70,  116, 131, 101, 116, 145, 160, 24,  39,  24,  8,   86,  101, 131, 146,
+  160, 161, 146, 161, 71,  86,  40,  55,  9,   24,  117, 132, 102, 117, 161,
+  176, 132, 147, 56,  71,  87,  102, 25,  40,  147, 162, 25,  9,   176, 177,
+  162, 177, 72,  87,  41,  56,  118, 133, 133, 148, 103, 118, 10,  25,  148,
+  163, 57,  72,  88,  103, 177, 192, 26,  41,  163, 178, 192, 193, 26,  10,
+  119, 134, 73,  88,  149, 164, 104, 119, 134, 149, 42,  57,  178, 193, 164,
+  179, 11,  26,  58,  73,  193, 208, 89,  104, 135, 150, 120, 135, 27,  42,
+  74,  89,  208, 209, 150, 165, 179, 194, 165, 180, 105, 120, 194, 209, 43,
+  58,  27,  11,  136, 151, 90,  105, 151, 166, 180, 195, 59,  74,  121, 136,
+  209, 224, 195, 210, 224, 225, 166, 181, 106, 121, 75,  90,  12,  27,  181,
+  196, 28,  12,  210, 225, 152, 167, 167, 182, 137, 152, 28,  43,  196, 211,
+  122, 137, 91,  106, 225, 240, 44,  59,  13,  28,  107, 122, 182, 197, 168,
+  183, 211, 226, 153, 168, 226, 241, 60,  75,  197, 212, 138, 153, 29,  44,
+  76,  91,  29,  13,  183, 198, 123, 138, 45,  60,  212, 227, 198, 213, 154,
+  169, 169, 184, 227, 242, 92,  107, 61,  76,  139, 154, 14,  29,  30,  14,
+  184, 199, 213, 228, 108, 123, 199, 214, 228, 243, 77,  92,  30,  45,  170,
+  185, 155, 170, 185, 200, 93,  108, 124, 139, 214, 229, 46,  61,  200, 215,
+  229, 244, 15,  30,  109, 124, 62,  77,  140, 155, 215, 230, 31,  46,  171,
+  186, 186, 201, 201, 216, 78,  93,  230, 245, 125, 140, 47,  62,  216, 231,
+  156, 171, 94,  109, 231, 246, 141, 156, 63,  78,  202, 217, 187, 202, 110,
+  125, 217, 232, 172, 187, 232, 247, 79,  94,  157, 172, 126, 141, 203, 218,
+  95,  110, 233, 248, 218, 233, 142, 157, 111, 126, 173, 188, 188, 203, 234,
+  249, 219, 234, 127, 142, 158, 173, 204, 219, 189, 204, 143, 158, 235, 250,
+  174, 189, 205, 220, 159, 174, 220, 235, 221, 236, 175, 190, 190, 205, 236,
+  251, 206, 221, 237, 252, 191, 206, 222, 237, 207, 222, 238, 253, 223, 238,
+  239, 254, 0,   0,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t,
+                mcol_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = {
+  0,   0,    0,   0,    32,  32,   64,  64,   96,  96,   128, 128,  160, 160,
+  192, 192,  224, 224,  256, 256,  288, 288,  320, 320,  352, 352,  384, 384,
+  416, 416,  448, 448,  480, 480,  512, 512,  544, 544,  576, 576,  608, 608,
+  640, 640,  672, 672,  704, 704,  736, 736,  768, 768,  800, 800,  832, 832,
+  864, 864,  896, 896,  928, 928,  960, 960,  0,   0,    1,   32,   33,  64,
+  65,  96,   97,  128,  129, 160,  161, 192,  193, 224,  225, 256,  257, 288,
+  289, 320,  321, 352,  353, 384,  385, 416,  417, 448,  449, 480,  481, 512,
+  513, 544,  545, 576,  577, 608,  609, 640,  641, 672,  673, 704,  705, 736,
+  737, 768,  769, 800,  801, 832,  833, 864,  865, 896,  897, 928,  929, 960,
+  961, 992,  1,   1,    2,   33,   34,  65,   66,  97,   98,  129,  130, 161,
+  162, 193,  194, 225,  226, 257,  258, 289,  290, 321,  322, 353,  354, 385,
+  386, 417,  418, 449,  450, 481,  482, 513,  514, 545,  546, 577,  578, 609,
+  610, 641,  642, 673,  674, 705,  706, 737,  738, 769,  770, 801,  802, 833,
+  834, 865,  866, 897,  898, 929,  930, 961,  962, 993,  2,   2,    3,   34,
+  35,  66,   67,  98,   99,  130,  131, 162,  163, 194,  195, 226,  227, 258,
+  259, 290,  291, 322,  323, 354,  355, 386,  387, 418,  419, 450,  451, 482,
+  483, 514,  515, 546,  547, 578,  579, 610,  611, 642,  643, 674,  675, 706,
+  707, 738,  739, 770,  771, 802,  803, 834,  835, 866,  867, 898,  899, 930,
+  931, 962,  963, 994,  3,   3,    4,   35,   36,  67,   68,  99,   100, 131,
+  132, 163,  164, 195,  196, 227,  228, 259,  260, 291,  292, 323,  324, 355,
+  356, 387,  388, 419,  420, 451,  452, 483,  484, 515,  516, 547,  548, 579,
+  580, 611,  612, 643,  644, 675,  676, 707,  708, 739,  740, 771,  772, 803,
+  804, 835,  836, 867,  868, 899,  900, 931,  932, 963,  964, 995,  4,   4,
+  5,   36,   37,  68,   69,  100,  101, 132,  133, 164,  165, 196,  197, 228,
+  229, 260,  261, 292,  293, 324,  325, 356,  357, 388,  389, 420,  421, 452,
+  453, 484,  485, 516,  517, 548,  549, 580,  581, 612,  613, 644,  645, 676,
+  677, 708,  709, 740,  741, 772,  773, 804,  805, 836,  837, 868,  869, 900,
+  901, 932,  933, 964,  965, 996,  5,   5,    6,   37,   38,  69,   70,  101,
+  102, 133,  134, 165,  166, 197,  198, 229,  230, 261,  262, 293,  294, 325,
+  326, 357,  358, 389,  390, 421,  422, 453,  454, 485,  486, 517,  518, 549,
+  550, 581,  582, 613,  614, 645,  646, 677,  678, 709,  710, 741,  742, 773,
+  774, 805,  806, 837,  838, 869,  870, 901,  902, 933,  934, 965,  966, 997,
+  6,   6,    7,   38,   39,  70,   71,  102,  103, 134,  135, 166,  167, 198,
+  199, 230,  231, 262,  263, 294,  295, 326,  327, 358,  359, 390,  391, 422,
+  423, 454,  455, 486,  487, 518,  519, 550,  551, 582,  583, 614,  615, 646,
+  647, 678,  679, 710,  711, 742,  743, 774,  775, 806,  807, 838,  839, 870,
+  871, 902,  903, 934,  935, 966,  967, 998,  7,   7,    8,   39,   40,  71,
+  72,  103,  104, 135,  136, 167,  168, 199,  200, 231,  232, 263,  264, 295,
+  296, 327,  328, 359,  360, 391,  392, 423,  424, 455,  456, 487,  488, 519,
+  520, 551,  552, 583,  584, 615,  616, 647,  648, 679,  680, 711,  712, 743,
+  744, 775,  776, 807,  808, 839,  840, 871,  872, 903,  904, 935,  936, 967,
+  968, 999,  8,   8,    9,   40,   41,  72,   73,  104,  105, 136,  137, 168,
+  169, 200,  201, 232,  233, 264,  265, 296,  297, 328,  329, 360,  361, 392,
+  393, 424,  425, 456,  457, 488,  489, 520,  521, 552,  553, 584,  585, 616,
+  617, 648,  649, 680,  681, 712,  713, 744,  745, 776,  777, 808,  809, 840,
+  841, 872,  873, 904,  905, 936,  937, 968,  969, 1000, 9,   9,    10,  41,
+  42,  73,   74,  105,  106, 137,  138, 169,  170, 201,  202, 233,  234, 265,
+  266, 297,  298, 329,  330, 361,  362, 393,  394, 425,  426, 457,  458, 489,
+  490, 521,  522, 553,  554, 585,  586, 617,  618, 649,  650, 681,  682, 713,
+  714, 745,  746, 777,  778, 809,  810, 841,  842, 873,  874, 905,  906, 937,
+  938, 969,  970, 1001, 10,  10,   11,  42,   43,  74,   75,  106,  107, 138,
+  139, 170,  171, 202,  203, 234,  235, 266,  267, 298,  299, 330,  331, 362,
+  363, 394,  395, 426,  427, 458,  459, 490,  491, 522,  523, 554,  555, 586,
+  587, 618,  619, 650,  651, 682,  683, 714,  715, 746,  747, 778,  779, 810,
+  811, 842,  843, 874,  875, 906,  907, 938,  939, 970,  971, 1002, 11,  11,
+  12,  43,   44,  75,   76,  107,  108, 139,  140, 171,  172, 203,  204, 235,
+  236, 267,  268, 299,  300, 331,  332, 363,  364, 395,  396, 427,  428, 459,
+  460, 491,  492, 523,  524, 555,  556, 587,  588, 619,  620, 651,  652, 683,
+  684, 715,  716, 747,  748, 779,  780, 811,  812, 843,  844, 875,  876, 907,
+  908, 939,  940, 971,  972, 1003, 12,  12,   13,  44,   45,  76,   77,  108,
+  109, 140,  141, 172,  173, 204,  205, 236,  237, 268,  269, 300,  301, 332,
+  333, 364,  365, 396,  397, 428,  429, 460,  461, 492,  493, 524,  525, 556,
+  557, 588,  589, 620,  621, 652,  653, 684,  685, 716,  717, 748,  749, 780,
+  781, 812,  813, 844,  845, 876,  877, 908,  909, 940,  941, 972,  973, 1004,
+  13,  13,   14,  45,   46,  77,   78,  109,  110, 141,  142, 173,  174, 205,
+  206, 237,  238, 269,  270, 301,  302, 333,  334, 365,  366, 397,  398, 429,
+  430, 461,  462, 493,  494, 525,  526, 557,  558, 589,  590, 621,  622, 653,
+  654, 685,  686, 717,  718, 749,  750, 781,  782, 813,  814, 845,  846, 877,
+  878, 909,  910, 941,  942, 973,  974, 1005, 14,  14,   15,  46,   47,  78,
+  79,  110,  111, 142,  143, 174,  175, 206,  207, 238,  239, 270,  271, 302,
+  303, 334,  335, 366,  367, 398,  399, 430,  431, 462,  463, 494,  495, 526,
+  527, 558,  559, 590,  591, 622,  623, 654,  655, 686,  687, 718,  719, 750,
+  751, 782,  783, 814,  815, 846,  847, 878,  879, 910,  911, 942,  943, 974,
+  975, 1006, 15,  15,   16,  47,   48,  79,   80,  111,  112, 143,  144, 175,
+  176, 207,  208, 239,  240, 271,  272, 303,  304, 335,  336, 367,  368, 399,
+  400, 431,  432, 463,  464, 495,  496, 527,  528, 559,  560, 591,  592, 623,
+  624, 655,  656, 687,  688, 719,  720, 751,  752, 783,  784, 815,  816, 847,
+  848, 879,  880, 911,  912, 943,  944, 975,  976, 1007, 16,  16,   17,  48,
+  49,  80,   81,  112,  113, 144,  145, 176,  177, 208,  209, 240,  241, 272,
+  273, 304,  305, 336,  337, 368,  369, 400,  401, 432,  433, 464,  465, 496,
+  497, 528,  529, 560,  561, 592,  593, 624,  625, 656,  657, 688,  689, 720,
+  721, 752,  753, 784,  785, 816,  817, 848,  849, 880,  881, 912,  913, 944,
+  945, 976,  977, 1008, 17,  17,   18,  49,   50,  81,   82,  113,  114, 145,
+  146, 177,  178, 209,  210, 241,  242, 273,  274, 305,  306, 337,  338, 369,
+  370, 401,  402, 433,  434, 465,  466, 497,  498, 529,  530, 561,  562, 593,
+  594, 625,  626, 657,  658, 689,  690, 721,  722, 753,  754, 785,  786, 817,
+  818, 849,  850, 881,  882, 913,  914, 945,  946, 977,  978, 1009, 18,  18,
+  19,  50,   51,  82,   83,  114,  115, 146,  147, 178,  179, 210,  211, 242,
+  243, 274,  275, 306,  307, 338,  339, 370,  371, 402,  403, 434,  435, 466,
+  467, 498,  499, 530,  531, 562,  563, 594,  595, 626,  627, 658,  659, 690,
+  691, 722,  723, 754,  755, 786,  787, 818,  819, 850,  851, 882,  883, 914,
+  915, 946,  947, 978,  979, 1010, 19,  19,   20,  51,   52,  83,   84,  115,
+  116, 147,  148, 179,  180, 211,  212, 243,  244, 275,  276, 307,  308, 339,
+  340, 371,  372, 403,  404, 435,  436, 467,  468, 499,  500, 531,  532, 563,
+  564, 595,  596, 627,  628, 659,  660, 691,  692, 723,  724, 755,  756, 787,
+  788, 819,  820, 851,  852, 883,  884, 915,  916, 947,  948, 979,  980, 1011,
+  20,  20,   21,  52,   53,  84,   85,  116,  117, 148,  149, 180,  181, 212,
+  213, 244,  245, 276,  277, 308,  309, 340,  341, 372,  373, 404,  405, 436,
+  437, 468,  469, 500,  501, 532,  533, 564,  565, 596,  597, 628,  629, 660,
+  661, 692,  693, 724,  725, 756,  757, 788,  789, 820,  821, 852,  853, 884,
+  885, 916,  917, 948,  949, 980,  981, 1012, 21,  21,   22,  53,   54,  85,
+  86,  117,  118, 149,  150, 181,  182, 213,  214, 245,  246, 277,  278, 309,
+  310, 341,  342, 373,  374, 405,  406, 437,  438, 469,  470, 501,  502, 533,
+  534, 565,  566, 597,  598, 629,  630, 661,  662, 693,  694, 725,  726, 757,
+  758, 789,  790, 821,  822, 853,  854, 885,  886, 917,  918, 949,  950, 981,
+  982, 1013, 22,  22,   23,  54,   55,  86,   87,  118,  119, 150,  151, 182,
+  183, 214,  215, 246,  247, 278,  279, 310,  311, 342,  343, 374,  375, 406,
+  407, 438,  439, 470,  471, 502,  503, 534,  535, 566,  567, 598,  599, 630,
+  631, 662,  663, 694,  695, 726,  727, 758,  759, 790,  791, 822,  823, 854,
+  855, 886,  887, 918,  919, 950,  951, 982,  983, 1014, 23,  23,   24,  55,
+  56,  87,   88,  119,  120, 151,  152, 183,  184, 215,  216, 247,  248, 279,
+  280, 311,  312, 343,  344, 375,  376, 407,  408, 439,  440, 471,  472, 503,
+  504, 535,  536, 567,  568, 599,  600, 631,  632, 663,  664, 695,  696, 727,
+  728, 759,  760, 791,  792, 823,  824, 855,  856, 887,  888, 919,  920, 951,
+  952, 983,  984, 1015, 24,  24,   25,  56,   57,  88,   89,  120,  121, 152,
+  153, 184,  185, 216,  217, 248,  249, 280,  281, 312,  313, 344,  345, 376,
+  377, 408,  409, 440,  441, 472,  473, 504,  505, 536,  537, 568,  569, 600,
+  601, 632,  633, 664,  665, 696,  697, 728,  729, 760,  761, 792,  793, 824,
+  825, 856,  857, 888,  889, 920,  921, 952,  953, 984,  985, 1016, 25,  25,
+  26,  57,   58,  89,   90,  121,  122, 153,  154, 185,  186, 217,  218, 249,
+  250, 281,  282, 313,  314, 345,  346, 377,  378, 409,  410, 441,  442, 473,
+  474, 505,  506, 537,  538, 569,  570, 601,  602, 633,  634, 665,  666, 697,
+  698, 729,  730, 761,  762, 793,  794, 825,  826, 857,  858, 889,  890, 921,
+  922, 953,  954, 985,  986, 1017, 26,  26,   27,  58,   59,  90,   91,  122,
+  123, 154,  155, 186,  187, 218,  219, 250,  251, 282,  283, 314,  315, 346,
+  347, 378,  379, 410,  411, 442,  443, 474,  475, 506,  507, 538,  539, 570,
+  571, 602,  603, 634,  635, 666,  667, 698,  699, 730,  731, 762,  763, 794,
+  795, 826,  827, 858,  859, 890,  891, 922,  923, 954,  955, 986,  987, 1018,
+  27,  27,   28,  59,   60,  91,   92,  123,  124, 155,  156, 187,  188, 219,
+  220, 251,  252, 283,  284, 315,  316, 347,  348, 379,  380, 411,  412, 443,
+  444, 475,  476, 507,  508, 539,  540, 571,  572, 603,  604, 635,  636, 667,
+  668, 699,  700, 731,  732, 763,  764, 795,  796, 827,  828, 859,  860, 891,
+  892, 923,  924, 955,  956, 987,  988, 1019, 28,  28,   29,  60,   61,  92,
+  93,  124,  125, 156,  157, 188,  189, 220,  221, 252,  253, 284,  285, 316,
+  317, 348,  349, 380,  381, 412,  413, 444,  445, 476,  477, 508,  509, 540,
+  541, 572,  573, 604,  605, 636,  637, 668,  669, 700,  701, 732,  733, 764,
+  765, 796,  797, 828,  829, 860,  861, 892,  893, 924,  925, 956,  957, 988,
+  989, 1020, 29,  29,   30,  61,   62,  93,   94,  125,  126, 157,  158, 189,
+  190, 221,  222, 253,  254, 285,  286, 317,  318, 349,  350, 381,  382, 413,
+  414, 445,  446, 477,  478, 509,  510, 541,  542, 573,  574, 605,  606, 637,
+  638, 669,  670, 701,  702, 733,  734, 765,  766, 797,  798, 829,  830, 861,
+  862, 893,  894, 925,  926, 957,  958, 989,  990, 1021, 30,  30,   31,  62,
+  63,  94,   95,  126,  127, 158,  159, 190,  191, 222,  223, 254,  255, 286,
+  287, 318,  319, 350,  351, 382,  383, 414,  415, 446,  447, 478,  479, 510,
+  511, 542,  543, 574,  575, 606,  607, 638,  639, 670,  671, 702,  703, 734,
+  735, 766,  767, 798,  799, 830,  831, 862,  863, 894,  895, 926,  927, 958,
+  959, 990,  991, 1022, 0,   0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                mrow_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = {
+  0,   0,    0,   0,    1,   1,    2,   2,    3,   3,    4,   4,    5,   5,
+  6,   6,    7,   7,    8,   8,    9,   9,    10,  10,   11,  11,   12,  12,
+  13,  13,   14,  14,   15,  15,   16,  16,   17,  17,   18,  18,   19,  19,
+  20,  20,   21,  21,   22,  22,   23,  23,   24,  24,   25,  25,   26,  26,
+  27,  27,   28,  28,   29,  29,   30,  30,   0,   0,    1,   32,   2,   33,
+  3,   34,   4,   35,   5,   36,   6,   37,   7,   38,   8,   39,   9,   40,
+  10,  41,   11,  42,   12,  43,   13,  44,   14,  45,   15,  46,   16,  47,
+  17,  48,   18,  49,   19,  50,   20,  51,   21,  52,   22,  53,   23,  54,
+  24,  55,   25,  56,   26,  57,   27,  58,   28,  59,   29,  60,   30,  61,
+  31,  62,   32,  32,   33,  64,   34,  65,   35,  66,   36,  67,   37,  68,
+  38,  69,   39,  70,   40,  71,   41,  72,   42,  73,   43,  74,   44,  75,
+  45,  76,   46,  77,   47,  78,   48,  79,   49,  80,   50,  81,   51,  82,
+  52,  83,   53,  84,   54,  85,   55,  86,   56,  87,   57,  88,   58,  89,
+  59,  90,   60,  91,   61,  92,   62,  93,   63,  94,   64,  64,   65,  96,
+  66,  97,   67,  98,   68,  99,   69,  100,  70,  101,  71,  102,  72,  103,
+  73,  104,  74,  105,  75,  106,  76,  107,  77,  108,  78,  109,  79,  110,
+  80,  111,  81,  112,  82,  113,  83,  114,  84,  115,  85,  116,  86,  117,
+  87,  118,  88,  119,  89,  120,  90,  121,  91,  122,  92,  123,  93,  124,
+  94,  125,  95,  126,  96,  96,   97,  128,  98,  129,  99,  130,  100, 131,
+  101, 132,  102, 133,  103, 134,  104, 135,  105, 136,  106, 137,  107, 138,
+  108, 139,  109, 140,  110, 141,  111, 142,  112, 143,  113, 144,  114, 145,
+  115, 146,  116, 147,  117, 148,  118, 149,  119, 150,  120, 151,  121, 152,
+  122, 153,  123, 154,  124, 155,  125, 156,  126, 157,  127, 158,  128, 128,
+  129, 160,  130, 161,  131, 162,  132, 163,  133, 164,  134, 165,  135, 166,
+  136, 167,  137, 168,  138, 169,  139, 170,  140, 171,  141, 172,  142, 173,
+  143, 174,  144, 175,  145, 176,  146, 177,  147, 178,  148, 179,  149, 180,
+  150, 181,  151, 182,  152, 183,  153, 184,  154, 185,  155, 186,  156, 187,
+  157, 188,  158, 189,  159, 190,  160, 160,  161, 192,  162, 193,  163, 194,
+  164, 195,  165, 196,  166, 197,  167, 198,  168, 199,  169, 200,  170, 201,
+  171, 202,  172, 203,  173, 204,  174, 205,  175, 206,  176, 207,  177, 208,
+  178, 209,  179, 210,  180, 211,  181, 212,  182, 213,  183, 214,  184, 215,
+  185, 216,  186, 217,  187, 218,  188, 219,  189, 220,  190, 221,  191, 222,
+  192, 192,  193, 224,  194, 225,  195, 226,  196, 227,  197, 228,  198, 229,
+  199, 230,  200, 231,  201, 232,  202, 233,  203, 234,  204, 235,  205, 236,
+  206, 237,  207, 238,  208, 239,  209, 240,  210, 241,  211, 242,  212, 243,
+  213, 244,  214, 245,  215, 246,  216, 247,  217, 248,  218, 249,  219, 250,
+  220, 251,  221, 252,  222, 253,  223, 254,  224, 224,  225, 256,  226, 257,
+  227, 258,  228, 259,  229, 260,  230, 261,  231, 262,  232, 263,  233, 264,
+  234, 265,  235, 266,  236, 267,  237, 268,  238, 269,  239, 270,  240, 271,
+  241, 272,  242, 273,  243, 274,  244, 275,  245, 276,  246, 277,  247, 278,
+  248, 279,  249, 280,  250, 281,  251, 282,  252, 283,  253, 284,  254, 285,
+  255, 286,  256, 256,  257, 288,  258, 289,  259, 290,  260, 291,  261, 292,
+  262, 293,  263, 294,  264, 295,  265, 296,  266, 297,  267, 298,  268, 299,
+  269, 300,  270, 301,  271, 302,  272, 303,  273, 304,  274, 305,  275, 306,
+  276, 307,  277, 308,  278, 309,  279, 310,  280, 311,  281, 312,  282, 313,
+  283, 314,  284, 315,  285, 316,  286, 317,  287, 318,  288, 288,  289, 320,
+  290, 321,  291, 322,  292, 323,  293, 324,  294, 325,  295, 326,  296, 327,
+  297, 328,  298, 329,  299, 330,  300, 331,  301, 332,  302, 333,  303, 334,
+  304, 335,  305, 336,  306, 337,  307, 338,  308, 339,  309, 340,  310, 341,
+  311, 342,  312, 343,  313, 344,  314, 345,  315, 346,  316, 347,  317, 348,
+  318, 349,  319, 350,  320, 320,  321, 352,  322, 353,  323, 354,  324, 355,
+  325, 356,  326, 357,  327, 358,  328, 359,  329, 360,  330, 361,  331, 362,
+  332, 363,  333, 364,  334, 365,  335, 366,  336, 367,  337, 368,  338, 369,
+  339, 370,  340, 371,  341, 372,  342, 373,  343, 374,  344, 375,  345, 376,
+  346, 377,  347, 378,  348, 379,  349, 380,  350, 381,  351, 382,  352, 352,
+  353, 384,  354, 385,  355, 386,  356, 387,  357, 388,  358, 389,  359, 390,
+  360, 391,  361, 392,  362, 393,  363, 394,  364, 395,  365, 396,  366, 397,
+  367, 398,  368, 399,  369, 400,  370, 401,  371, 402,  372, 403,  373, 404,
+  374, 405,  375, 406,  376, 407,  377, 408,  378, 409,  379, 410,  380, 411,
+  381, 412,  382, 413,  383, 414,  384, 384,  385, 416,  386, 417,  387, 418,
+  388, 419,  389, 420,  390, 421,  391, 422,  392, 423,  393, 424,  394, 425,
+  395, 426,  396, 427,  397, 428,  398, 429,  399, 430,  400, 431,  401, 432,
+  402, 433,  403, 434,  404, 435,  405, 436,  406, 437,  407, 438,  408, 439,
+  409, 440,  410, 441,  411, 442,  412, 443,  413, 444,  414, 445,  415, 446,
+  416, 416,  417, 448,  418, 449,  419, 450,  420, 451,  421, 452,  422, 453,
+  423, 454,  424, 455,  425, 456,  426, 457,  427, 458,  428, 459,  429, 460,
+  430, 461,  431, 462,  432, 463,  433, 464,  434, 465,  435, 466,  436, 467,
+  437, 468,  438, 469,  439, 470,  440, 471,  441, 472,  442, 473,  443, 474,
+  444, 475,  445, 476,  446, 477,  447, 478,  448, 448,  449, 480,  450, 481,
+  451, 482,  452, 483,  453, 484,  454, 485,  455, 486,  456, 487,  457, 488,
+  458, 489,  459, 490,  460, 491,  461, 492,  462, 493,  463, 494,  464, 495,
+  465, 496,  466, 497,  467, 498,  468, 499,  469, 500,  470, 501,  471, 502,
+  472, 503,  473, 504,  474, 505,  475, 506,  476, 507,  477, 508,  478, 509,
+  479, 510,  480, 480,  481, 512,  482, 513,  483, 514,  484, 515,  485, 516,
+  486, 517,  487, 518,  488, 519,  489, 520,  490, 521,  491, 522,  492, 523,
+  493, 524,  494, 525,  495, 526,  496, 527,  497, 528,  498, 529,  499, 530,
+  500, 531,  501, 532,  502, 533,  503, 534,  504, 535,  505, 536,  506, 537,
+  507, 538,  508, 539,  509, 540,  510, 541,  511, 542,  512, 512,  513, 544,
+  514, 545,  515, 546,  516, 547,  517, 548,  518, 549,  519, 550,  520, 551,
+  521, 552,  522, 553,  523, 554,  524, 555,  525, 556,  526, 557,  527, 558,
+  528, 559,  529, 560,  530, 561,  531, 562,  532, 563,  533, 564,  534, 565,
+  535, 566,  536, 567,  537, 568,  538, 569,  539, 570,  540, 571,  541, 572,
+  542, 573,  543, 574,  544, 544,  545, 576,  546, 577,  547, 578,  548, 579,
+  549, 580,  550, 581,  551, 582,  552, 583,  553, 584,  554, 585,  555, 586,
+  556, 587,  557, 588,  558, 589,  559, 590,  560, 591,  561, 592,  562, 593,
+  563, 594,  564, 595,  565, 596,  566, 597,  567, 598,  568, 599,  569, 600,
+  570, 601,  571, 602,  572, 603,  573, 604,  574, 605,  575, 606,  576, 576,
+  577, 608,  578, 609,  579, 610,  580, 611,  581, 612,  582, 613,  583, 614,
+  584, 615,  585, 616,  586, 617,  587, 618,  588, 619,  589, 620,  590, 621,
+  591, 622,  592, 623,  593, 624,  594, 625,  595, 626,  596, 627,  597, 628,
+  598, 629,  599, 630,  600, 631,  601, 632,  602, 633,  603, 634,  604, 635,
+  605, 636,  606, 637,  607, 638,  608, 608,  609, 640,  610, 641,  611, 642,
+  612, 643,  613, 644,  614, 645,  615, 646,  616, 647,  617, 648,  618, 649,
+  619, 650,  620, 651,  621, 652,  622, 653,  623, 654,  624, 655,  625, 656,
+  626, 657,  627, 658,  628, 659,  629, 660,  630, 661,  631, 662,  632, 663,
+  633, 664,  634, 665,  635, 666,  636, 667,  637, 668,  638, 669,  639, 670,
+  640, 640,  641, 672,  642, 673,  643, 674,  644, 675,  645, 676,  646, 677,
+  647, 678,  648, 679,  649, 680,  650, 681,  651, 682,  652, 683,  653, 684,
+  654, 685,  655, 686,  656, 687,  657, 688,  658, 689,  659, 690,  660, 691,
+  661, 692,  662, 693,  663, 694,  664, 695,  665, 696,  666, 697,  667, 698,
+  668, 699,  669, 700,  670, 701,  671, 702,  672, 672,  673, 704,  674, 705,
+  675, 706,  676, 707,  677, 708,  678, 709,  679, 710,  680, 711,  681, 712,
+  682, 713,  683, 714,  684, 715,  685, 716,  686, 717,  687, 718,  688, 719,
+  689, 720,  690, 721,  691, 722,  692, 723,  693, 724,  694, 725,  695, 726,
+  696, 727,  697, 728,  698, 729,  699, 730,  700, 731,  701, 732,  702, 733,
+  703, 734,  704, 704,  705, 736,  706, 737,  707, 738,  708, 739,  709, 740,
+  710, 741,  711, 742,  712, 743,  713, 744,  714, 745,  715, 746,  716, 747,
+  717, 748,  718, 749,  719, 750,  720, 751,  721, 752,  722, 753,  723, 754,
+  724, 755,  725, 756,  726, 757,  727, 758,  728, 759,  729, 760,  730, 761,
+  731, 762,  732, 763,  733, 764,  734, 765,  735, 766,  736, 736,  737, 768,
+  738, 769,  739, 770,  740, 771,  741, 772,  742, 773,  743, 774,  744, 775,
+  745, 776,  746, 777,  747, 778,  748, 779,  749, 780,  750, 781,  751, 782,
+  752, 783,  753, 784,  754, 785,  755, 786,  756, 787,  757, 788,  758, 789,
+  759, 790,  760, 791,  761, 792,  762, 793,  763, 794,  764, 795,  765, 796,
+  766, 797,  767, 798,  768, 768,  769, 800,  770, 801,  771, 802,  772, 803,
+  773, 804,  774, 805,  775, 806,  776, 807,  777, 808,  778, 809,  779, 810,
+  780, 811,  781, 812,  782, 813,  783, 814,  784, 815,  785, 816,  786, 817,
+  787, 818,  788, 819,  789, 820,  790, 821,  791, 822,  792, 823,  793, 824,
+  794, 825,  795, 826,  796, 827,  797, 828,  798, 829,  799, 830,  800, 800,
+  801, 832,  802, 833,  803, 834,  804, 835,  805, 836,  806, 837,  807, 838,
+  808, 839,  809, 840,  810, 841,  811, 842,  812, 843,  813, 844,  814, 845,
+  815, 846,  816, 847,  817, 848,  818, 849,  819, 850,  820, 851,  821, 852,
+  822, 853,  823, 854,  824, 855,  825, 856,  826, 857,  827, 858,  828, 859,
+  829, 860,  830, 861,  831, 862,  832, 832,  833, 864,  834, 865,  835, 866,
+  836, 867,  837, 868,  838, 869,  839, 870,  840, 871,  841, 872,  842, 873,
+  843, 874,  844, 875,  845, 876,  846, 877,  847, 878,  848, 879,  849, 880,
+  850, 881,  851, 882,  852, 883,  853, 884,  854, 885,  855, 886,  856, 887,
+  857, 888,  858, 889,  859, 890,  860, 891,  861, 892,  862, 893,  863, 894,
+  864, 864,  865, 896,  866, 897,  867, 898,  868, 899,  869, 900,  870, 901,
+  871, 902,  872, 903,  873, 904,  874, 905,  875, 906,  876, 907,  877, 908,
+  878, 909,  879, 910,  880, 911,  881, 912,  882, 913,  883, 914,  884, 915,
+  885, 916,  886, 917,  887, 918,  888, 919,  889, 920,  890, 921,  891, 922,
+  892, 923,  893, 924,  894, 925,  895, 926,  896, 896,  897, 928,  898, 929,
+  899, 930,  900, 931,  901, 932,  902, 933,  903, 934,  904, 935,  905, 936,
+  906, 937,  907, 938,  908, 939,  909, 940,  910, 941,  911, 942,  912, 943,
+  913, 944,  914, 945,  915, 946,  916, 947,  917, 948,  918, 949,  919, 950,
+  920, 951,  921, 952,  922, 953,  923, 954,  924, 955,  925, 956,  926, 957,
+  927, 958,  928, 928,  929, 960,  930, 961,  931, 962,  932, 963,  933, 964,
+  934, 965,  935, 966,  936, 967,  937, 968,  938, 969,  939, 970,  940, 971,
+  941, 972,  942, 973,  943, 974,  944, 975,  945, 976,  946, 977,  947, 978,
+  948, 979,  949, 980,  950, 981,  951, 982,  952, 983,  953, 984,  954, 985,
+  955, 986,  956, 987,  957, 988,  958, 989,  959, 990,  960, 960,  961, 992,
+  962, 993,  963, 994,  964, 995,  965, 996,  966, 997,  967, 998,  968, 999,
+  969, 1000, 970, 1001, 971, 1002, 972, 1003, 973, 1004, 974, 1005, 975, 1006,
+  976, 1007, 977, 1008, 978, 1009, 979, 1010, 980, 1011, 981, 1012, 982, 1013,
+  983, 1014, 984, 1015, 985, 1016, 986, 1017, 987, 1018, 988, 1019, 989, 1020,
+  990, 1021, 991, 1022, 0,   0,
+};
+#endif  // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t,
+                default_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = {
+  0,   0,    0,   0,    32,  0,    32,  32,   1,   32,  33,  1,    64,  64,
+  33,  64,   2,   33,   96,  96,   34,  2,    65,  96,  34,  65,   128, 128,
+  97,  128,  3,   34,   66,  97,   35,  3,    35,  66,  98,  129,  129, 160,
+  160, 161,  4,   35,   67,  98,   192, 192,  36,  4,   130, 161,  161, 192,
+  36,  67,   99,  130,  5,   36,   68,  99,   193, 224, 162, 193,  224, 225,
+  131, 162,  37,  68,   100, 131,  37,  5,    194, 225, 225, 256,  256, 257,
+  163, 194,  69,  100,  132, 163,  6,   37,   226, 257, 38,  6,    195, 226,
+  257, 288,  101, 132,  288, 289,  38,  69,   164, 195, 133, 164,  258, 289,
+  227, 258,  196, 227,  7,   38,   289, 320,  70,  101, 320, 321,  39,  7,
+  165, 196,  39,  70,   102, 133,  290, 321,  259, 290, 228, 259,  321, 352,
+  352, 353,  197, 228,  134, 165,  71,  102,  8,   39,  322, 353,  291, 322,
+  260, 291,  103, 134,  353, 384,  166, 197,  229, 260, 40,  71,   40,  8,
+  384, 385,  135, 166,  354, 385,  323, 354,  198, 229, 292, 323,  72,  103,
+  261, 292,  9,   40,   385, 416,  167, 198,  104, 135, 230, 261,  355, 386,
+  416, 417,  293, 324,  324, 355,  41,  9,    41,  72,  386, 417,  199, 230,
+  136, 167,  417, 448,  262, 293,  356, 387,  73,  104, 387, 418,  231, 262,
+  10,  41,   168, 199,  325, 356,  418, 449,  105, 136, 448, 449,  42,  73,
+  294, 325,  200, 231,  42,  10,   357, 388,  137, 168, 263, 294,  388, 419,
+  74,  105,  419, 450,  449, 480,  326, 357,  232, 263, 295, 326,  169, 200,
+  11,  42,   106, 137,  480, 481,  450, 481,  358, 389, 264, 295,  201, 232,
+  138, 169,  389, 420,  43,  74,   420, 451,  327, 358, 43,  11,   481, 512,
+  233, 264,  451, 482,  296, 327,  75,  106,  170, 201, 482, 513,  512, 513,
+  390, 421,  359, 390,  421, 452,  107, 138,  12,  43,  202, 233,  452, 483,
+  265, 296,  328, 359,  139, 170,  44,  75,   483, 514, 513, 544,  234, 265,
+  297, 328,  422, 453,  44,  12,   391, 422,  171, 202, 76,  107,  514, 545,
+  453, 484,  544, 545,  266, 297,  203, 234,  108, 139, 329, 360,  298, 329,
+  140, 171,  515, 546,  13,  44,   423, 454,  235, 266, 545, 576,  454, 485,
+  45,  76,   172, 203,  330, 361,  576, 577,  45,  13,  267, 298,  546, 577,
+  77,  108,  204, 235,  455, 486,  577, 608,  299, 330, 109, 140,  547, 578,
+  14,  45,   46,  14,   141, 172,  578, 609,  331, 362, 46,  77,   173, 204,
+  15,  15,   78,  109,  205, 236,  579, 610,  110, 141, 15,  46,   142, 173,
+  47,  78,   174, 205,  16,  16,   79,  110,  206, 237, 16,  47,   111, 142,
+  48,  79,   143, 174,  80,  111,  175, 206,  17,  48,  49,  17,   207, 238,
+  49,  80,   81,  112,  18,  18,   18,  49,   50,  81,  82,  113,  19,  50,
+  51,  82,   83,  114,  608, 609,  484, 515,  360, 391, 236, 267,  112, 143,
+  51,  19,   640, 640,  609, 640,  516, 547,  485, 516, 392, 423,  361, 392,
+  268, 299,  237, 268,  144, 175,  113, 144,  20,  51,  52,  20,   672, 672,
+  641, 672,  610, 641,  548, 579,  517, 548,  486, 517, 424, 455,  393, 424,
+  362, 393,  300, 331,  269, 300,  238, 269,  176, 207, 145, 176,  114, 145,
+  52,  83,   21,  52,   53,  21,   704, 704,  673, 704, 642, 673,  611, 642,
+  580, 611,  549, 580,  518, 549,  487, 518,  456, 487, 425, 456,  394, 425,
+  363, 394,  332, 363,  301, 332,  270, 301,  239, 270, 208, 239,  177, 208,
+  146, 177,  115, 146,  84,  115,  53,  84,   22,  53,  54,  22,   705, 736,
+  674, 705,  643, 674,  581, 612,  550, 581,  519, 550, 457, 488,  426, 457,
+  395, 426,  333, 364,  302, 333,  271, 302,  209, 240, 178, 209,  147, 178,
+  85,  116,  54,  85,   23,  54,   706, 737,  675, 706, 582, 613,  551, 582,
+  458, 489,  427, 458,  334, 365,  303, 334,  210, 241, 179, 210,  86,  117,
+  55,  86,   707, 738,  583, 614,  459, 490,  335, 366, 211, 242,  87,  118,
+  736, 737,  612, 643,  488, 519,  364, 395,  240, 271, 116, 147,  55,  23,
+  768, 768,  737, 768,  644, 675,  613, 644,  520, 551, 489, 520,  396, 427,
+  365, 396,  272, 303,  241, 272,  148, 179,  117, 148, 24,  55,   56,  24,
+  800, 800,  769, 800,  738, 769,  676, 707,  645, 676, 614, 645,  552, 583,
+  521, 552,  490, 521,  428, 459,  397, 428,  366, 397, 304, 335,  273, 304,
+  242, 273,  180, 211,  149, 180,  118, 149,  56,  87,  25,  56,   57,  25,
+  832, 832,  801, 832,  770, 801,  739, 770,  708, 739, 677, 708,  646, 677,
+  615, 646,  584, 615,  553, 584,  522, 553,  491, 522, 460, 491,  429, 460,
+  398, 429,  367, 398,  336, 367,  305, 336,  274, 305, 243, 274,  212, 243,
+  181, 212,  150, 181,  119, 150,  88,  119,  57,  88,  26,  57,   58,  26,
+  833, 864,  802, 833,  771, 802,  709, 740,  678, 709, 647, 678,  585, 616,
+  554, 585,  523, 554,  461, 492,  430, 461,  399, 430, 337, 368,  306, 337,
+  275, 306,  213, 244,  182, 213,  151, 182,  89,  120, 58,  89,   27,  58,
+  834, 865,  803, 834,  710, 741,  679, 710,  586, 617, 555, 586,  462, 493,
+  431, 462,  338, 369,  307, 338,  214, 245,  183, 214, 90,  121,  59,  90,
+  835, 866,  711, 742,  587, 618,  463, 494,  339, 370, 215, 246,  91,  122,
+  864, 865,  740, 771,  616, 647,  492, 523,  368, 399, 244, 275,  120, 151,
+  59,  27,   896, 896,  865, 896,  772, 803,  741, 772, 648, 679,  617, 648,
+  524, 555,  493, 524,  400, 431,  369, 400,  276, 307, 245, 276,  152, 183,
+  121, 152,  28,  59,   60,  28,   928, 928,  897, 928, 866, 897,  804, 835,
+  773, 804,  742, 773,  680, 711,  649, 680,  618, 649, 556, 587,  525, 556,
+  494, 525,  432, 463,  401, 432,  370, 401,  308, 339, 277, 308,  246, 277,
+  184, 215,  153, 184,  122, 153,  60,  91,   29,  60,  61,  29,   960, 960,
+  929, 960,  898, 929,  867, 898,  836, 867,  805, 836, 774, 805,  743, 774,
+  712, 743,  681, 712,  650, 681,  619, 650,  588, 619, 557, 588,  526, 557,
+  495, 526,  464, 495,  433, 464,  402, 433,  371, 402, 340, 371,  309, 340,
+  278, 309,  247, 278,  216, 247,  185, 216,  154, 185, 123, 154,  92,  123,
+  61,  92,   30,  61,   62,  30,   961, 992,  930, 961, 899, 930,  837, 868,
+  806, 837,  775, 806,  713, 744,  682, 713,  651, 682, 589, 620,  558, 589,
+  527, 558,  465, 496,  434, 465,  403, 434,  341, 372, 310, 341,  279, 310,
+  217, 248,  186, 217,  155, 186,  93,  124,  62,  93,  31,  62,   962, 993,
+  931, 962,  838, 869,  807, 838,  714, 745,  683, 714, 590, 621,  559, 590,
+  466, 497,  435, 466,  342, 373,  311, 342,  218, 249, 187, 218,  94,  125,
+  63,  94,   963, 994,  839, 870,  715, 746,  591, 622, 467, 498,  343, 374,
+  219, 250,  95,  126,  868, 899,  744, 775,  620, 651, 496, 527,  372, 403,
+  248, 279,  124, 155,  900, 931,  869, 900,  776, 807, 745, 776,  652, 683,
+  621, 652,  528, 559,  497, 528,  404, 435,  373, 404, 280, 311,  249, 280,
+  156, 187,  125, 156,  932, 963,  901, 932,  870, 901, 808, 839,  777, 808,
+  746, 777,  684, 715,  653, 684,  622, 653,  560, 591, 529, 560,  498, 529,
+  436, 467,  405, 436,  374, 405,  312, 343,  281, 312, 250, 281,  188, 219,
+  157, 188,  126, 157,  964, 995,  933, 964,  902, 933, 871, 902,  840, 871,
+  809, 840,  778, 809,  747, 778,  716, 747,  685, 716, 654, 685,  623, 654,
+  592, 623,  561, 592,  530, 561,  499, 530,  468, 499, 437, 468,  406, 437,
+  375, 406,  344, 375,  313, 344,  282, 313,  251, 282, 220, 251,  189, 220,
+  158, 189,  127, 158,  965, 996,  934, 965,  903, 934, 841, 872,  810, 841,
+  779, 810,  717, 748,  686, 717,  655, 686,  593, 624, 562, 593,  531, 562,
+  469, 500,  438, 469,  407, 438,  345, 376,  314, 345, 283, 314,  221, 252,
+  190, 221,  159, 190,  966, 997,  935, 966,  842, 873, 811, 842,  718, 749,
+  687, 718,  594, 625,  563, 594,  470, 501,  439, 470, 346, 377,  315, 346,
+  222, 253,  191, 222,  967, 998,  843, 874,  719, 750, 595, 626,  471, 502,
+  347, 378,  223, 254,  872, 903,  748, 779,  624, 655, 500, 531,  376, 407,
+  252, 283,  904, 935,  873, 904,  780, 811,  749, 780, 656, 687,  625, 656,
+  532, 563,  501, 532,  408, 439,  377, 408,  284, 315, 253, 284,  936, 967,
+  905, 936,  874, 905,  812, 843,  781, 812,  750, 781, 688, 719,  657, 688,
+  626, 657,  564, 595,  533, 564,  502, 533,  440, 471, 409, 440,  378, 409,
+  316, 347,  285, 316,  254, 285,  968, 999,  937, 968, 906, 937,  875, 906,
+  844, 875,  813, 844,  782, 813,  751, 782,  720, 751, 689, 720,  658, 689,
+  627, 658,  596, 627,  565, 596,  534, 565,  503, 534, 472, 503,  441, 472,
+  410, 441,  379, 410,  348, 379,  317, 348,  286, 317, 255, 286,  969, 1000,
+  938, 969,  907, 938,  845, 876,  814, 845,  783, 814, 721, 752,  690, 721,
+  659, 690,  597, 628,  566, 597,  535, 566,  473, 504, 442, 473,  411, 442,
+  349, 380,  318, 349,  287, 318,  970, 1001, 939, 970, 846, 877,  815, 846,
+  722, 753,  691, 722,  598, 629,  567, 598,  474, 505, 443, 474,  350, 381,
+  319, 350,  971, 1002, 847, 878,  723, 754,  599, 630, 475, 506,  351, 382,
+  876, 907,  752, 783,  628, 659,  504, 535,  380, 411, 908, 939,  877, 908,
+  784, 815,  753, 784,  660, 691,  629, 660,  536, 567, 505, 536,  412, 443,
+  381, 412,  940, 971,  909, 940,  878, 909,  816, 847, 785, 816,  754, 785,
+  692, 723,  661, 692,  630, 661,  568, 599,  537, 568, 506, 537,  444, 475,
+  413, 444,  382, 413,  972, 1003, 941, 972,  910, 941, 879, 910,  848, 879,
+  817, 848,  786, 817,  755, 786,  724, 755,  693, 724, 662, 693,  631, 662,
+  600, 631,  569, 600,  538, 569,  507, 538,  476, 507, 445, 476,  414, 445,
+  383, 414,  973, 1004, 942, 973,  911, 942,  849, 880, 818, 849,  787, 818,
+  725, 756,  694, 725,  663, 694,  601, 632,  570, 601, 539, 570,  477, 508,
+  446, 477,  415, 446,  974, 1005, 943, 974,  850, 881, 819, 850,  726, 757,
+  695, 726,  602, 633,  571, 602,  478, 509,  447, 478, 975, 1006, 851, 882,
+  727, 758,  603, 634,  479, 510,  880, 911,  756, 787, 632, 663,  508, 539,
+  912, 943,  881, 912,  788, 819,  757, 788,  664, 695, 633, 664,  540, 571,
+  509, 540,  944, 975,  913, 944,  882, 913,  820, 851, 789, 820,  758, 789,
+  696, 727,  665, 696,  634, 665,  572, 603,  541, 572, 510, 541,  976, 1007,
+  945, 976,  914, 945,  883, 914,  852, 883,  821, 852, 790, 821,  759, 790,
+  728, 759,  697, 728,  666, 697,  635, 666,  604, 635, 573, 604,  542, 573,
+  511, 542,  977, 1008, 946, 977,  915, 946,  853, 884, 822, 853,  791, 822,
+  729, 760,  698, 729,  667, 698,  605, 636,  574, 605, 543, 574,  978, 1009,
+  947, 978,  854, 885,  823, 854,  730, 761,  699, 730, 606, 637,  575, 606,
+  979, 1010, 855, 886,  731, 762,  607, 638,  884, 915, 760, 791,  636, 667,
+  916, 947,  885, 916,  792, 823,  761, 792,  668, 699, 637, 668,  948, 979,
+  917, 948,  886, 917,  824, 855,  793, 824,  762, 793, 700, 731,  669, 700,
+  638, 669,  980, 1011, 949, 980,  918, 949,  887, 918, 856, 887,  825, 856,
+  794, 825,  763, 794,  732, 763,  701, 732,  670, 701, 639, 670,  981, 1012,
+  950, 981,  919, 950,  857, 888,  826, 857,  795, 826, 733, 764,  702, 733,
+  671, 702,  982, 1013, 951, 982,  858, 889,  827, 858, 734, 765,  703, 734,
+  983, 1014, 859, 890,  735, 766,  888, 919,  764, 795, 920, 951,  889, 920,
+  796, 827,  765, 796,  952, 983,  921, 952,  890, 921, 828, 859,  797, 828,
+  766, 797,  984, 1015, 953, 984,  922, 953,  891, 922, 860, 891,  829, 860,
+  798, 829,  767, 798,  985, 1016, 954, 985,  923, 954, 861, 892,  830, 861,
+  799, 830,  986, 1017, 955, 986,  862, 893,  831, 862, 987, 1018, 863, 894,
+  892, 923,  924, 955,  893, 924,  956, 987,  925, 956, 894, 925,  988, 1019,
+  957, 988,  926, 957,  895, 926,  989, 1020, 958, 989, 927, 958,  990, 1021,
+  959, 990,  991, 1022, 0,   0,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t,
+                v2_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = {
+  0,   0,    0,   0,    0,   0,    1,   32,   1,   1,   32,  32,   2,   33,
+  33,  64,   34,  65,   2,   2,    64,  64,   3,   34,  65,  96,   35,  66,
+  66,  97,   3,   3,    96,  96,   4,   35,   97,  128, 67,  98,   36,  67,
+  98,  129,  4,   4,    68,  99,   99,  130,  128, 128, 5,   36,   129, 160,
+  37,  68,   130, 161,  100, 131,  69,  100,  131, 162, 5,   5,    160, 160,
+  6,   37,   161, 192,  38,  69,   162, 193,  101, 132, 132, 163,  70,  101,
+  163, 194,  6,   6,    192, 192,  7,   38,   133, 164, 193, 224,  102, 133,
+  164, 195,  39,  70,   194, 225,  71,  102,  195, 226, 134, 165,  165, 196,
+  7,   7,    224, 224,  8,   39,   103, 134,  196, 227, 225, 256,  40,  71,
+  226, 257,  166, 197,  72,  103,  227, 258,  135, 166, 197, 228,  104, 135,
+  228, 259,  8,   8,    256, 256,  9,   40,   257, 288, 41,  72,   167, 198,
+  198, 229,  258, 289,  136, 167,  229, 260,  73,  104, 259, 290,  105, 136,
+  260, 291,  199, 230,  9,   9,    168, 199,  230, 261, 288, 288,  10,  41,
+  289, 320,  42,  73,   290, 321,  137, 168,  261, 292, 74,  105,  291, 322,
+  200, 231,  231, 262,  106, 137,  292, 323,  169, 200, 262, 293,  10,  10,
+  320, 320,  11,  42,   321, 352,  43,  74,   138, 169, 293, 324,  322, 353,
+  232, 263,  75,  106,  201, 232,  263, 294,  323, 354, 170, 201,  294, 325,
+  107, 138,  324, 355,  11,  11,   352, 352,  12,  43,  233, 264,  264, 295,
+  353, 384,  139, 170,  325, 356,  44,  75,   354, 385, 202, 233,  295, 326,
+  76,  107,  355, 386,  171, 202,  326, 357,  108, 139, 356, 387,  265, 296,
+  234, 265,  296, 327,  12,  12,   140, 171,  357, 388, 384, 384,  13,  44,
+  203, 234,  327, 358,  385, 416,  45,  76,   386, 417, 77,  108,  387, 418,
+  172, 203,  358, 389,  266, 297,  297, 328,  109, 140, 235, 266,  328, 359,
+  388, 419,  204, 235,  359, 390,  141, 172,  389, 420, 13,  13,   416, 416,
+  14,  45,   417, 448,  46,  77,   298, 329,  418, 449, 267, 298,  329, 360,
+  78,  109,  173, 204,  390, 421,  419, 450,  236, 267, 360, 391,  110, 141,
+  420, 451,  205, 236,  391, 422,  142, 173,  299, 330, 330, 361,  421, 452,
+  14,  14,   268, 299,  361, 392,  448, 448,  15,  46,  449, 480,  47,  78,
+  450, 481,  174, 205,  422, 453,  237, 268,  392, 423, 79,  110,  451, 482,
+  111, 142,  452, 483,  331, 362,  300, 331,  362, 393, 206, 237,  423, 454,
+  143, 174,  269, 300,  393, 424,  453, 484,  480, 480, 481, 512,  238, 269,
+  424, 455,  482, 513,  175, 206,  454, 485,  332, 363, 363, 394,  483, 514,
+  301, 332,  394, 425,  484, 515,  207, 238,  455, 486, 270, 301,  425, 456,
+  485, 516,  364, 395,  239, 270,  456, 487,  512, 512, 333, 364,  395, 426,
+  513, 544,  486, 517,  514, 545,  302, 333,  426, 457, 515, 546,  487, 518,
+  516, 547,  271, 302,  457, 488,  365, 396,  396, 427, 517, 548,  334, 365,
+  427, 458,  488, 519,  544, 544,  303, 334,  458, 489, 518, 549,  545, 576,
+  546, 577,  547, 578,  489, 520,  397, 428,  519, 550, 366, 397,  428, 459,
+  548, 579,  335, 366,  459, 490,  549, 580,  520, 551, 490, 521,  550, 581,
+  576, 576,  577, 608,  398, 429,  429, 460,  578, 609, 367, 398,  460, 491,
+  521, 552,  579, 610,  551, 582,  491, 522,  580, 611, 581, 612,  552, 583,
+  522, 553,  430, 461,  399, 430,  461, 492,  582, 613, 492, 523,  608, 608,
+  609, 640,  610, 641,  553, 584,  611, 642,  523, 554, 583, 614,  612, 643,
+  431, 462,  462, 493,  554, 585,  493, 524,  584, 615, 613, 644,  524, 555,
+  614, 645,  640, 640,  585, 616,  641, 672,  555, 586, 642, 673,  615, 646,
+  463, 494,  643, 674,  494, 525,  644, 675,  525, 556, 586, 617,  616, 647,
+  645, 676,  556, 587,  646, 677,  495, 526,  617, 648, 587, 618,  672, 672,
+  526, 557,  673, 704,  674, 705,  647, 678,  557, 588, 675, 706,  618, 649,
+  676, 707,  588, 619,  648, 679,  677, 708,  527, 558, 558, 589,  678, 709,
+  619, 650,  649, 680,  704, 704,  589, 620,  705, 736, 679, 710,  706, 737,
+  707, 738,  650, 681,  620, 651,  708, 739,  680, 711, 559, 590,  709, 740,
+  590, 621,  651, 682,  681, 712,  710, 741,  621, 652, 736, 736,  737, 768,
+  711, 742,  738, 769,  682, 713,  652, 683,  739, 770, 591, 622,  740, 771,
+  712, 743,  622, 653,  741, 772,  683, 714,  653, 684, 713, 744,  742, 773,
+  623, 654,  743, 774,  768, 768,  769, 800,  684, 715, 714, 745,  770, 801,
+  771, 802,  654, 685,  744, 775,  772, 803,  715, 746, 773, 804,  685, 716,
+  745, 776,  774, 805,  655, 686,  716, 747,  775, 806, 746, 777,  800, 800,
+  801, 832,  686, 717,  802, 833,  803, 834,  776, 807, 804, 835,  747, 778,
+  717, 748,  805, 836,  777, 808,  687, 718,  806, 837, 748, 779,  718, 749,
+  778, 809,  807, 838,  832, 832,  833, 864,  834, 865, 835, 866,  808, 839,
+  749, 780,  836, 867,  779, 810,  719, 750,  837, 868, 809, 840,  838, 869,
+  780, 811,  750, 781,  810, 841,  839, 870,  864, 864, 865, 896,  866, 897,
+  840, 871,  867, 898,  781, 812,  811, 842,  868, 899, 751, 782,  869, 900,
+  841, 872,  812, 843,  870, 901,  782, 813,  842, 873, 871, 902,  896, 896,
+  897, 928,  813, 844,  898, 929,  872, 903,  783, 814, 843, 874,  899, 930,
+  900, 931,  873, 904,  901, 932,  814, 845,  844, 875, 902, 933,  874, 905,
+  903, 934,  845, 876,  928, 928,  815, 846,  929, 960, 930, 961,  875, 906,
+  904, 935,  931, 962,  932, 963,  905, 936,  846, 877, 933, 964,  876, 907,
+  934, 965,  906, 937,  935, 966,  877, 908,  847, 878, 960, 960,  907, 938,
+  961, 992,  936, 967,  962, 993,  963, 994,  964, 995, 878, 909,  937, 968,
+  908, 939,  965, 996,  966, 997,  938, 969,  879, 910, 909, 940,  967, 998,
+  939, 970,  968, 999,  910, 941,  969, 1000, 940, 971, 970, 1001, 911, 942,
+  941, 972,  971, 1002, 942, 973,  972, 1003, 943, 974, 973, 1004, 974, 1005,
+  975, 1006, 15,  15,   16,  47,   48,  79,   80,  111, 112, 143,  144, 175,
+  16,  16,   17,  48,   176, 207,  49,  80,   81,  112, 113, 144,  208, 239,
+  145, 176,  240, 271,  17,  17,   18,  49,   177, 208, 50,  81,   82,  113,
+  272, 303,  209, 240,  114, 145,  146, 177,  241, 272, 304, 335,  178, 209,
+  18,  18,   19,  50,   51,  82,   83,  114,  273, 304, 210, 241,  115, 146,
+  336, 367,  147, 178,  242, 273,  305, 336,  179, 210, 19,  19,   368, 399,
+  20,  51,   52,  83,   274, 305,  84,  115,  211, 242, 337, 368,  116, 147,
+  306, 337,  148, 179,  243, 274,  400, 431,  369, 400, 180, 211,  20,  20,
+  21,  52,   275, 306,  53,  84,   338, 369,  212, 243, 85,  116,  432, 463,
+  117, 148,  401, 432,  307, 338,  244, 275,  149, 180, 370, 401,  181, 212,
+  276, 307,  464, 495,  339, 370,  21,  21,   22,  53,  433, 464,  54,  85,
+  213, 244,  86,  117,  402, 433,  118, 149,  308, 339, 245, 276,  371, 402,
+  150, 181,  496, 527,  465, 496,  182, 213,  434, 465, 340, 371,  277, 308,
+  22,  22,   23,  54,   403, 434,  55,  86,   214, 245, 87,  118,  309, 340,
+  372, 403,  119, 150,  497, 528,  528, 559,  246, 277, 466, 497,  151, 182,
+  435, 466,  341, 372,  183, 214,  278, 309,  404, 435, 23,  23,   24,  55,
+  215, 246,  529, 560,  56,  87,   498, 529,  560, 591, 310, 341,  88,  119,
+  373, 404,  467, 498,  120, 151,  247, 278,  436, 467, 152, 183,  342, 373,
+  279, 310,  405, 436,  184, 215,  530, 561,  561, 592, 499, 530,  592, 623,
+  24,  24,   216, 247,  468, 499,  25,  56,   374, 405, 57,  88,   311, 342,
+  89,  120,  437, 468,  248, 279,  121, 152,  562, 593, 153, 184,  343, 374,
+  531, 562,  593, 624,  406, 437,  500, 531,  624, 655, 280, 311,  185, 216,
+  469, 500,  375, 406,  217, 248,  25,  25,   312, 343, 26,  57,   58,  89,
+  438, 469,  90,  121,  563, 594,  594, 625,  249, 280, 532, 563,  625, 656,
+  122, 153,  344, 375,  501, 532,  656, 687,  407, 438, 154, 185,  281, 312,
+  470, 501,  186, 217,  376, 407,  595, 626,  564, 595, 626, 657,  218, 249,
+  313, 344,  439, 470,  26,  26,   27,  58,   533, 564, 657, 688,  59,  90,
+  91,  122,  250, 281,  502, 533,  688, 719,  123, 154, 408, 439,  345, 376,
+  155, 186,  471, 502,  282, 313,  596, 627,  627, 658, 187, 218,  565, 596,
+  658, 689,  377, 408,  440, 471,  534, 565,  689, 720, 314, 345,  219, 250,
+  27,  27,   28,  59,   503, 534,  720, 751,  60,  91,  92,  123,  251, 282,
+  409, 440,  346, 377,  124, 155,  628, 659,  472, 503, 597, 628,  659, 690,
+  566, 597,  690, 721,  156, 187,  283, 314,  535, 566, 721, 752,  188, 219,
+  378, 409,  441, 472,  315, 346,  504, 535,  752, 783, 220, 251,  28,  28,
+  629, 660,  660, 691,  29,  60,   61,  92,   410, 441, 598, 629,  691, 722,
+  252, 283,  93,  124,  347, 378,  473, 504,  567, 598, 722, 753,  125, 156,
+  284, 315,  536, 567,  753, 784,  157, 188,  442, 473, 379, 410,  189, 220,
+  505, 536,  784, 815,  661, 692,  316, 347,  630, 661, 692, 723,  221, 252,
+  599, 630,  723, 754,  411, 442,  29,  29,   568, 599, 754, 785,  30,  61,
+  474, 505,  62,  93,   253, 284,  348, 379,  94,  125, 537, 568,  785, 816,
+  126, 157,  285, 316,  158, 189,  443, 474,  662, 693, 693, 724,  380, 411,
+  631, 662,  724, 755,  506, 537,  816, 847,  190, 221, 600, 631,  755, 786,
+  317, 348,  222, 253,  569, 600,  786, 817,  412, 443, 475, 506,  30,  30,
+  31,  62,   349, 380,  254, 285,  63,  94,   538, 569, 817, 848,  694, 725,
+  95,  126,  663, 694,  725, 756,  632, 663,  756, 787, 127, 158,  444, 475,
+  286, 317,  381, 412,  507, 538,  848, 879,  159, 190, 601, 632,  787, 818,
+  191, 222,  318, 349,  570, 601,  818, 849,  476, 507, 223, 254,  413, 444,
+  695, 726,  726, 757,  664, 695,  757, 788,  539, 570, 849, 880,  350, 381,
+  255, 286,  633, 664,  788, 819,  445, 476,  602, 633, 819, 850,  508, 539,
+  880, 911,  287, 318,  382, 413,  571, 602,  850, 881, 727, 758,  696, 727,
+  758, 789,  319, 350,  477, 508,  665, 696,  789, 820, 414, 445,  540, 571,
+  881, 912,  634, 665,  820, 851,  351, 382,  603, 634, 851, 882,  446, 477,
+  509, 540,  912, 943,  383, 414,  728, 759,  759, 790, 572, 603,  882, 913,
+  697, 728,  790, 821,  666, 697,  821, 852,  478, 509, 635, 666,  852, 883,
+  415, 446,  541, 572,  913, 944,  604, 635,  883, 914, 760, 791,  729, 760,
+  791, 822,  510, 541,  944, 975,  447, 478,  698, 729, 822, 853,  573, 604,
+  914, 945,  667, 698,  853, 884,  636, 667,  884, 915, 479, 510,  542, 573,
+  945, 976,  761, 792,  792, 823,  605, 636,  915, 946, 730, 761,  823, 854,
+  699, 730,  854, 885,  511, 542,  976, 1007, 574, 605, 946, 977,  668, 699,
+  885, 916,  637, 668,  916, 947,  543, 574,  793, 824, 977, 1008, 762, 793,
+  824, 855,  731, 762,  855, 886,  606, 637,  947, 978, 700, 731,  886, 917,
+  669, 700,  917, 948,  575, 606,  978, 1009, 638, 669, 948, 979,  794, 825,
+  825, 856,  763, 794,  856, 887,  732, 763,  887, 918, 607, 638,  979, 1010,
+  701, 732,  918, 949,  670, 701,  949, 980,  826, 857, 795, 826,  857, 888,
+  764, 795,  888, 919,  639, 670,  980, 1011, 733, 764, 919, 950,  702, 733,
+  950, 981,  671, 702,  981, 1012, 827, 858,  858, 889, 796, 827,  889, 920,
+  765, 796,  920, 951,  734, 765,  951, 982,  703, 734, 982, 1013, 859, 890,
+  828, 859,  890, 921,  797, 828,  921, 952,  766, 797, 952, 983,  735, 766,
+  983, 1014, 860, 891,  891, 922,  829, 860,  922, 953, 798, 829,  953, 984,
+  767, 798,  984, 1015, 892, 923,  861, 892,  923, 954, 830, 861,  954, 985,
+  799, 830,  985, 1016, 893, 924,  924, 955,  862, 893, 955, 986,  831, 862,
+  986, 1017, 925, 956,  894, 925,  956, 987,  863, 894, 987, 1018, 926, 957,
+  957, 988,  895, 926,  988, 1019, 958, 989,  927, 958, 989, 1020, 959, 990,
+  990, 1021, 991, 1022, 0,   0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                h2_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = {
+  0,   0,    0,   0,    0,   0,    1,   32,   1,   1,    32,  32,   2,   33,
+  33,  64,   34,  65,   2,   2,    64,  64,   3,   34,   65,  96,   35,  66,
+  66,  97,   3,   3,    96,  96,   4,   35,   97,  128,  67,  98,   36,  67,
+  98,  129,  4,   4,    68,  99,   99,  130,  128, 128,  5,   36,   129, 160,
+  37,  68,   130, 161,  100, 131,  69,  100,  131, 162,  5,   5,    160, 160,
+  6,   37,   161, 192,  38,  69,   162, 193,  101, 132,  132, 163,  70,  101,
+  163, 194,  6,   6,    192, 192,  7,   38,   133, 164,  193, 224,  102, 133,
+  164, 195,  39,  70,   194, 225,  71,  102,  195, 226,  134, 165,  165, 196,
+  7,   7,    224, 224,  8,   39,   103, 134,  196, 227,  225, 256,  40,  71,
+  226, 257,  166, 197,  72,  103,  227, 258,  135, 166,  197, 228,  104, 135,
+  228, 259,  8,   8,    256, 256,  9,   40,   257, 288,  41,  72,   167, 198,
+  198, 229,  258, 289,  136, 167,  229, 260,  73,  104,  259, 290,  105, 136,
+  260, 291,  199, 230,  9,   9,    168, 199,  230, 261,  288, 288,  10,  41,
+  289, 320,  42,  73,   290, 321,  137, 168,  261, 292,  74,  105,  291, 322,
+  200, 231,  231, 262,  106, 137,  292, 323,  169, 200,  262, 293,  10,  10,
+  320, 320,  11,  42,   321, 352,  43,  74,   138, 169,  293, 324,  322, 353,
+  232, 263,  75,  106,  201, 232,  263, 294,  323, 354,  170, 201,  294, 325,
+  107, 138,  324, 355,  11,  11,   352, 352,  12,  43,   233, 264,  264, 295,
+  353, 384,  139, 170,  325, 356,  44,  75,   354, 385,  202, 233,  295, 326,
+  76,  107,  355, 386,  171, 202,  326, 357,  108, 139,  356, 387,  265, 296,
+  234, 265,  296, 327,  12,  12,   140, 171,  357, 388,  384, 384,  13,  44,
+  203, 234,  327, 358,  385, 416,  45,  76,   386, 417,  77,  108,  387, 418,
+  172, 203,  358, 389,  266, 297,  297, 328,  109, 140,  235, 266,  328, 359,
+  388, 419,  204, 235,  359, 390,  141, 172,  389, 420,  13,  13,   416, 416,
+  14,  45,   417, 448,  46,  77,   298, 329,  418, 449,  267, 298,  329, 360,
+  78,  109,  173, 204,  390, 421,  419, 450,  236, 267,  360, 391,  110, 141,
+  420, 451,  205, 236,  391, 422,  142, 173,  299, 330,  330, 361,  421, 452,
+  14,  14,   268, 299,  361, 392,  448, 448,  15,  46,   449, 480,  47,  78,
+  450, 481,  174, 205,  422, 453,  237, 268,  392, 423,  79,  110,  451, 482,
+  111, 142,  452, 483,  331, 362,  300, 331,  362, 393,  206, 237,  423, 454,
+  143, 174,  269, 300,  393, 424,  453, 484,  15,  15,   16,  47,   48,  79,
+  238, 269,  424, 455,  175, 206,  454, 485,  80,  111,  332, 363,  363, 394,
+  301, 332,  394, 425,  112, 143,  207, 238,  455, 486,  270, 301,  425, 456,
+  144, 175,  364, 395,  16,  16,   239, 270,  456, 487,  17,  48,   333, 364,
+  395, 426,  176, 207,  49,  80,   302, 333,  426, 457,  81,  112,  113, 144,
+  208, 239,  271, 302,  457, 488,  365, 396,  396, 427,  145, 176,  334, 365,
+  427, 458,  240, 271,  17,  17,   18,  49,   177, 208,  303, 334,  458, 489,
+  50,  81,   82,  113,  272, 303,  209, 240,  397, 428,  114, 145,  366, 397,
+  428, 459,  335, 366,  459, 490,  146, 177,  241, 272,  304, 335,  178, 209,
+  18,  18,   19,  50,   51,  82,   398, 429,  429, 460,  367, 398,  460, 491,
+  83,  114,  273, 304,  210, 241,  115, 146,  336, 367,  147, 178,  242, 273,
+  305, 336,  430, 461,  399, 430,  461, 492,  179, 210,  19,  19,   368, 399,
+  20,  51,   52,  83,   274, 305,  84,  115,  211, 242,  337, 368,  116, 147,
+  431, 462,  462, 493,  306, 337,  148, 179,  243, 274,  400, 431,  369, 400,
+  180, 211,  20,  20,   21,  52,   275, 306,  53,  84,   338, 369,  212, 243,
+  85,  116,  463, 494,  432, 463,  117, 148,  401, 432,  307, 338,  244, 275,
+  149, 180,  370, 401,  181, 212,  276, 307,  464, 495,  339, 370,  21,  21,
+  22,  53,   433, 464,  54,  85,   213, 244,  86,  117,  402, 433,  118, 149,
+  308, 339,  245, 276,  371, 402,  150, 181,  465, 496,  182, 213,  434, 465,
+  340, 371,  277, 308,  22,  22,   23,  54,   403, 434,  55,  86,   214, 245,
+  87,  118,  309, 340,  372, 403,  119, 150,  246, 277,  466, 497,  151, 182,
+  435, 466,  341, 372,  183, 214,  278, 309,  404, 435,  23,  23,   24,  55,
+  215, 246,  56,  87,   310, 341,  88,  119,  373, 404,  467, 498,  120, 151,
+  247, 278,  436, 467,  152, 183,  342, 373,  279, 310,  405, 436,  184, 215,
+  24,  24,   216, 247,  468, 499,  25,  56,   374, 405,  57,  88,   311, 342,
+  89,  120,  437, 468,  248, 279,  121, 152,  153, 184,  343, 374,  406, 437,
+  280, 311,  185, 216,  469, 500,  375, 406,  217, 248,  25,  25,   312, 343,
+  26,  57,   58,  89,   438, 469,  90,  121,  249, 280,  122, 153,  344, 375,
+  407, 438,  154, 185,  281, 312,  470, 501,  186, 217,  376, 407,  218, 249,
+  313, 344,  439, 470,  26,  26,   27,  58,   59,  90,   91,  122,  250, 281,
+  123, 154,  408, 439,  345, 376,  155, 186,  471, 502,  282, 313,  187, 218,
+  377, 408,  440, 471,  314, 345,  219, 250,  27,  27,   28,  59,   60,  91,
+  92,  123,  251, 282,  409, 440,  346, 377,  124, 155,  472, 503,  156, 187,
+  283, 314,  188, 219,  378, 409,  441, 472,  315, 346,  220, 251,  28,  28,
+  29,  60,   61,  92,   410, 441,  252, 283,  93,  124,  347, 378,  473, 504,
+  125, 156,  284, 315,  157, 188,  442, 473,  379, 410,  189, 220,  316, 347,
+  221, 252,  411, 442,  29,  29,   30,  61,   474, 505,  62,  93,   253, 284,
+  348, 379,  94,  125,  126, 157,  285, 316,  158, 189,  443, 474,  380, 411,
+  190, 221,  317, 348,  222, 253,  412, 443,  475, 506,  30,  30,   31,  62,
+  349, 380,  254, 285,  63,  94,   95,  126,  127, 158,  444, 475,  286, 317,
+  381, 412,  159, 190,  191, 222,  318, 349,  476, 507,  223, 254,  413, 444,
+  350, 381,  255, 286,  445, 476,  287, 318,  382, 413,  319, 350,  477, 508,
+  414, 445,  351, 382,  446, 477,  383, 414,  478, 509,  415, 446,  447, 478,
+  479, 510,  480, 480,  481, 512,  482, 513,  483, 514,  484, 515,  485, 516,
+  512, 512,  513, 544,  486, 517,  514, 545,  515, 546,  487, 518,  516, 547,
+  517, 548,  488, 519,  544, 544,  518, 549,  545, 576,  546, 577,  547, 578,
+  489, 520,  519, 550,  548, 579,  549, 580,  520, 551,  490, 521,  550, 581,
+  576, 576,  577, 608,  578, 609,  521, 552,  579, 610,  551, 582,  491, 522,
+  580, 611,  581, 612,  552, 583,  522, 553,  582, 613,  492, 523,  608, 608,
+  609, 640,  610, 641,  553, 584,  611, 642,  523, 554,  583, 614,  612, 643,
+  554, 585,  493, 524,  584, 615,  613, 644,  524, 555,  614, 645,  640, 640,
+  585, 616,  641, 672,  555, 586,  642, 673,  615, 646,  643, 674,  494, 525,
+  644, 675,  525, 556,  586, 617,  616, 647,  645, 676,  556, 587,  646, 677,
+  495, 526,  617, 648,  587, 618,  672, 672,  526, 557,  673, 704,  674, 705,
+  647, 678,  557, 588,  675, 706,  618, 649,  676, 707,  588, 619,  648, 679,
+  677, 708,  496, 527,  527, 558,  558, 589,  678, 709,  619, 650,  649, 680,
+  704, 704,  589, 620,  705, 736,  679, 710,  706, 737,  707, 738,  650, 681,
+  620, 651,  497, 528,  528, 559,  708, 739,  680, 711,  559, 590,  709, 740,
+  590, 621,  651, 682,  681, 712,  710, 741,  621, 652,  736, 736,  737, 768,
+  529, 560,  711, 742,  498, 529,  560, 591,  738, 769,  682, 713,  652, 683,
+  739, 770,  591, 622,  740, 771,  712, 743,  622, 653,  741, 772,  683, 714,
+  653, 684,  713, 744,  742, 773,  530, 561,  561, 592,  499, 530,  592, 623,
+  623, 654,  743, 774,  768, 768,  769, 800,  684, 715,  714, 745,  770, 801,
+  771, 802,  654, 685,  744, 775,  772, 803,  562, 593,  531, 562,  593, 624,
+  715, 746,  773, 804,  685, 716,  500, 531,  624, 655,  745, 776,  774, 805,
+  655, 686,  716, 747,  775, 806,  746, 777,  800, 800,  801, 832,  686, 717,
+  802, 833,  563, 594,  594, 625,  803, 834,  532, 563,  625, 656,  776, 807,
+  804, 835,  501, 532,  656, 687,  747, 778,  717, 748,  805, 836,  777, 808,
+  687, 718,  806, 837,  748, 779,  595, 626,  564, 595,  626, 657,  718, 749,
+  778, 809,  807, 838,  832, 832,  533, 564,  657, 688,  833, 864,  834, 865,
+  835, 866,  502, 533,  688, 719,  808, 839,  749, 780,  836, 867,  779, 810,
+  719, 750,  837, 868,  809, 840,  596, 627,  627, 658,  565, 596,  658, 689,
+  838, 869,  780, 811,  750, 781,  534, 565,  689, 720,  810, 841,  839, 870,
+  864, 864,  503, 534,  720, 751,  865, 896,  866, 897,  840, 871,  867, 898,
+  781, 812,  811, 842,  628, 659,  868, 899,  751, 782,  597, 628,  659, 690,
+  566, 597,  690, 721,  869, 900,  841, 872,  535, 566,  721, 752,  812, 843,
+  870, 901,  782, 813,  842, 873,  504, 535,  752, 783,  871, 902,  629, 660,
+  660, 691,  896, 896,  897, 928,  598, 629,  691, 722,  813, 844,  898, 929,
+  872, 903,  783, 814,  843, 874,  899, 930,  567, 598,  722, 753,  900, 931,
+  536, 567,  753, 784,  873, 904,  901, 932,  814, 845,  844, 875,  902, 933,
+  505, 536,  784, 815,  661, 692,  630, 661,  692, 723,  874, 905,  599, 630,
+  723, 754,  903, 934,  845, 876,  568, 599,  754, 785,  928, 928,  815, 846,
+  929, 960,  930, 961,  875, 906,  904, 935,  931, 962,  537, 568,  785, 816,
+  932, 963,  905, 936,  662, 693,  693, 724,  846, 877,  933, 964,  876, 907,
+  631, 662,  724, 755,  506, 537,  816, 847,  934, 965,  600, 631,  755, 786,
+  906, 937,  569, 600,  786, 817,  935, 966,  877, 908,  847, 878,  960, 960,
+  907, 938,  961, 992,  936, 967,  538, 569,  817, 848,  962, 993,  694, 725,
+  663, 694,  725, 756,  963, 994,  632, 663,  756, 787,  964, 995,  878, 909,
+  937, 968,  507, 538,  848, 879,  908, 939,  601, 632,  787, 818,  965, 996,
+  966, 997,  570, 601,  818, 849,  938, 969,  879, 910,  909, 940,  967, 998,
+  695, 726,  726, 757,  664, 695,  757, 788,  539, 570,  849, 880,  939, 970,
+  633, 664,  788, 819,  968, 999,  602, 633,  819, 850,  910, 941,  508, 539,
+  880, 911,  969, 1000, 940, 971,  571, 602,  850, 881,  727, 758,  696, 727,
+  758, 789,  970, 1001, 665, 696,  789, 820,  911, 942,  941, 972,  540, 571,
+  881, 912,  634, 665,  820, 851,  971, 1002, 603, 634,  851, 882,  942, 973,
+  509, 540,  912, 943,  728, 759,  759, 790,  972, 1003, 572, 603,  882, 913,
+  697, 728,  790, 821,  666, 697,  821, 852,  943, 974,  635, 666,  852, 883,
+  541, 572,  913, 944,  973, 1004, 604, 635,  883, 914,  760, 791,  729, 760,
+  791, 822,  510, 541,  944, 975,  974, 1005, 698, 729,  822, 853,  573, 604,
+  914, 945,  667, 698,  853, 884,  636, 667,  884, 915,  975, 1006, 542, 573,
+  945, 976,  761, 792,  792, 823,  605, 636,  915, 946,  730, 761,  823, 854,
+  699, 730,  854, 885,  511, 542,  976, 1007, 574, 605,  946, 977,  668, 699,
+  885, 916,  637, 668,  916, 947,  543, 574,  793, 824,  977, 1008, 762, 793,
+  824, 855,  731, 762,  855, 886,  606, 637,  947, 978,  700, 731,  886, 917,
+  669, 700,  917, 948,  575, 606,  978, 1009, 638, 669,  948, 979,  794, 825,
+  825, 856,  763, 794,  856, 887,  732, 763,  887, 918,  607, 638,  979, 1010,
+  701, 732,  918, 949,  670, 701,  949, 980,  826, 857,  795, 826,  857, 888,
+  764, 795,  888, 919,  639, 670,  980, 1011, 733, 764,  919, 950,  702, 733,
+  950, 981,  671, 702,  981, 1012, 827, 858,  858, 889,  796, 827,  889, 920,
+  765, 796,  920, 951,  734, 765,  951, 982,  703, 734,  982, 1013, 859, 890,
+  828, 859,  890, 921,  797, 828,  921, 952,  766, 797,  952, 983,  735, 766,
+  983, 1014, 860, 891,  891, 922,  829, 860,  922, 953,  798, 829,  953, 984,
+  767, 798,  984, 1015, 892, 923,  861, 892,  923, 954,  830, 861,  954, 985,
+  799, 830,  985, 1016, 893, 924,  924, 955,  862, 893,  955, 986,  831, 862,
+  986, 1017, 925, 956,  894, 925,  956, 987,  863, 894,  987, 1018, 926, 957,
+  957, 988,  895, 926,  988, 1019, 958, 989,  927, 958,  989, 1020, 959, 990,
+  990, 1021, 991, 1022, 0,   0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                qtr_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = {
+  0,   0,    0,   0,    0,   0,    1,   32,   1,   1,    32,  32,   2,   33,
+  33,  64,   34,  65,   2,   2,    64,  64,   3,   34,   65,  96,   35,  66,
+  66,  97,   3,   3,    96,  96,   4,   35,   97,  128,  67,  98,   36,  67,
+  98,  129,  4,   4,    68,  99,   99,  130,  128, 128,  5,   36,   129, 160,
+  37,  68,   130, 161,  100, 131,  69,  100,  131, 162,  5,   5,    160, 160,
+  6,   37,   161, 192,  38,  69,   162, 193,  101, 132,  132, 163,  70,  101,
+  163, 194,  6,   6,    192, 192,  7,   38,   133, 164,  193, 224,  102, 133,
+  164, 195,  39,  70,   194, 225,  71,  102,  195, 226,  134, 165,  165, 196,
+  7,   7,    224, 224,  8,   39,   103, 134,  196, 227,  225, 256,  40,  71,
+  226, 257,  166, 197,  72,  103,  227, 258,  135, 166,  197, 228,  104, 135,
+  228, 259,  8,   8,    256, 256,  9,   40,   257, 288,  41,  72,   167, 198,
+  198, 229,  258, 289,  136, 167,  229, 260,  73,  104,  259, 290,  105, 136,
+  260, 291,  199, 230,  9,   9,    168, 199,  230, 261,  288, 288,  10,  41,
+  289, 320,  42,  73,   290, 321,  137, 168,  261, 292,  74,  105,  291, 322,
+  200, 231,  231, 262,  106, 137,  292, 323,  169, 200,  262, 293,  10,  10,
+  320, 320,  11,  42,   321, 352,  43,  74,   138, 169,  293, 324,  322, 353,
+  232, 263,  75,  106,  201, 232,  263, 294,  323, 354,  170, 201,  294, 325,
+  107, 138,  324, 355,  11,  11,   352, 352,  12,  43,   233, 264,  264, 295,
+  353, 384,  139, 170,  325, 356,  44,  75,   354, 385,  202, 233,  295, 326,
+  76,  107,  355, 386,  171, 202,  326, 357,  108, 139,  356, 387,  265, 296,
+  234, 265,  296, 327,  12,  12,   140, 171,  357, 388,  384, 384,  13,  44,
+  203, 234,  327, 358,  385, 416,  45,  76,   386, 417,  77,  108,  387, 418,
+  172, 203,  358, 389,  266, 297,  297, 328,  109, 140,  235, 266,  328, 359,
+  388, 419,  204, 235,  359, 390,  141, 172,  389, 420,  13,  13,   416, 416,
+  14,  45,   417, 448,  46,  77,   298, 329,  418, 449,  267, 298,  329, 360,
+  78,  109,  173, 204,  390, 421,  419, 450,  236, 267,  360, 391,  110, 141,
+  420, 451,  205, 236,  391, 422,  142, 173,  299, 330,  330, 361,  421, 452,
+  14,  14,   268, 299,  361, 392,  448, 448,  15,  46,   449, 480,  47,  78,
+  450, 481,  174, 205,  422, 453,  237, 268,  392, 423,  79,  110,  451, 482,
+  111, 142,  452, 483,  331, 362,  300, 331,  362, 393,  206, 237,  423, 454,
+  143, 174,  269, 300,  393, 424,  453, 484,  238, 269,  424, 455,  175, 206,
+  454, 485,  332, 363,  363, 394,  301, 332,  394, 425,  207, 238,  455, 486,
+  270, 301,  425, 456,  364, 395,  239, 270,  456, 487,  333, 364,  395, 426,
+  302, 333,  426, 457,  271, 302,  457, 488,  365, 396,  396, 427,  334, 365,
+  427, 458,  303, 334,  458, 489,  397, 428,  366, 397,  428, 459,  335, 366,
+  459, 490,  398, 429,  429, 460,  367, 398,  460, 491,  430, 461,  399, 430,
+  461, 492,  431, 462,  462, 493,  463, 494,  15,  15,   480, 480,  16,  47,
+  481, 512,  48,  79,   482, 513,  80,  111,  483, 514,  112, 143,  484, 515,
+  144, 175,  485, 516,  16,  16,   512, 512,  17,  48,   513, 544,  176, 207,
+  486, 517,  49,  80,   514, 545,  81,  112,  515, 546,  113, 144,  208, 239,
+  487, 518,  516, 547,  145, 176,  517, 548,  240, 271,  488, 519,  17,  17,
+  544, 544,  18,  49,   177, 208,  518, 549,  545, 576,  50,  81,   546, 577,
+  82,  113,  547, 578,  272, 303,  489, 520,  209, 240,  519, 550,  114, 145,
+  548, 579,  146, 177,  549, 580,  241, 272,  520, 551,  304, 335,  490, 521,
+  178, 209,  550, 581,  18,  18,   576, 576,  19,  50,   577, 608,  51,  82,
+  578, 609,  83,  114,  273, 304,  521, 552,  579, 610,  210, 241,  551, 582,
+  115, 146,  336, 367,  491, 522,  580, 611,  147, 178,  581, 612,  242, 273,
+  552, 583,  305, 336,  522, 553,  179, 210,  582, 613,  19,  19,   368, 399,
+  492, 523,  608, 608,  20,  51,   609, 640,  52,  83,   610, 641,  274, 305,
+  553, 584,  84,  115,  611, 642,  211, 242,  337, 368,  523, 554,  583, 614,
+  116, 147,  612, 643,  306, 337,  554, 585,  148, 179,  243, 274,  400, 431,
+  493, 524,  584, 615,  613, 644,  369, 400,  524, 555,  180, 211,  614, 645,
+  20,  20,   640, 640,  21,  52,   275, 306,  585, 616,  641, 672,  53,  84,
+  338, 369,  555, 586,  642, 673,  212, 243,  615, 646,  85,  116,  643, 674,
+  432, 463,  494, 525,  117, 148,  644, 675,  401, 432,  525, 556,  307, 338,
+  586, 617,  244, 275,  616, 647,  149, 180,  645, 676,  370, 401,  556, 587,
+  181, 212,  646, 677,  276, 307,  464, 495,  495, 526,  617, 648,  339, 370,
+  587, 618,  21,  21,   672, 672,  22,  53,   433, 464,  526, 557,  673, 704,
+  54,  85,   674, 705,  213, 244,  647, 678,  86,  117,  402, 433,  557, 588,
+  675, 706,  118, 149,  308, 339,  618, 649,  676, 707,  245, 276,  371, 402,
+  588, 619,  648, 679,  150, 181,  677, 708,  496, 527,  465, 496,  527, 558,
+  182, 213,  434, 465,  558, 589,  678, 709,  340, 371,  619, 650,  277, 308,
+  649, 680,  22,  22,   704, 704,  23,  54,   403, 434,  589, 620,  705, 736,
+  55,  86,   214, 245,  679, 710,  706, 737,  87,  118,  707, 738,  309, 340,
+  650, 681,  372, 403,  620, 651,  119, 150,  497, 528,  528, 559,  708, 739,
+  246, 277,  680, 711,  466, 497,  559, 590,  151, 182,  709, 740,  435, 466,
+  590, 621,  341, 372,  651, 682,  183, 214,  278, 309,  681, 712,  710, 741,
+  404, 435,  621, 652,  23,  23,   736, 736,  24,  55,   737, 768,  215, 246,
+  529, 560,  711, 742,  56,  87,   498, 529,  560, 591,  738, 769,  310, 341,
+  682, 713,  88,  119,  373, 404,  652, 683,  739, 770,  467, 498,  591, 622,
+  120, 151,  740, 771,  247, 278,  712, 743,  436, 467,  622, 653,  152, 183,
+  741, 772,  342, 373,  683, 714,  279, 310,  405, 436,  653, 684,  713, 744,
+  184, 215,  742, 773,  530, 561,  561, 592,  499, 530,  592, 623,  24,  24,
+  216, 247,  468, 499,  623, 654,  743, 774,  768, 768,  25,  56,   769, 800,
+  374, 405,  684, 715,  57,  88,   311, 342,  714, 745,  770, 801,  89,  120,
+  771, 802,  437, 468,  654, 685,  248, 279,  744, 775,  121, 152,  772, 803,
+  562, 593,  153, 184,  343, 374,  531, 562,  593, 624,  715, 746,  773, 804,
+  406, 437,  685, 716,  500, 531,  624, 655,  280, 311,  745, 776,  185, 216,
+  774, 805,  469, 500,  655, 686,  375, 406,  716, 747,  217, 248,  775, 806,
+  25,  25,   312, 343,  746, 777,  800, 800,  26,  57,   801, 832,  58,  89,
+  438, 469,  686, 717,  802, 833,  90,  121,  563, 594,  594, 625,  803, 834,
+  249, 280,  532, 563,  625, 656,  776, 807,  122, 153,  804, 835,  344, 375,
+  501, 532,  656, 687,  747, 778,  407, 438,  717, 748,  154, 185,  805, 836,
+  281, 312,  777, 808,  470, 501,  687, 718,  186, 217,  806, 837,  376, 407,
+  748, 779,  595, 626,  564, 595,  626, 657,  218, 249,  313, 344,  439, 470,
+  718, 749,  778, 809,  807, 838,  26,  26,   832, 832,  27,  58,   533, 564,
+  657, 688,  833, 864,  59,  90,   834, 865,  91,  122,  835, 866,  250, 281,
+  502, 533,  688, 719,  808, 839,  123, 154,  408, 439,  749, 780,  836, 867,
+  345, 376,  779, 810,  155, 186,  471, 502,  719, 750,  837, 868,  282, 313,
+  809, 840,  596, 627,  627, 658,  187, 218,  565, 596,  658, 689,  838, 869,
+  377, 408,  780, 811,  440, 471,  750, 781,  534, 565,  689, 720,  314, 345,
+  810, 841,  219, 250,  839, 870,  27,  27,   864, 864,  28,  59,   503, 534,
+  720, 751,  865, 896,  60,  91,   866, 897,  92,  123,  251, 282,  840, 871,
+  867, 898,  409, 440,  781, 812,  346, 377,  811, 842,  124, 155,  628, 659,
+  868, 899,  472, 503,  751, 782,  597, 628,  659, 690,  566, 597,  690, 721,
+  156, 187,  869, 900,  283, 314,  841, 872,  535, 566,  721, 752,  188, 219,
+  378, 409,  812, 843,  870, 901,  441, 472,  782, 813,  315, 346,  842, 873,
+  504, 535,  752, 783,  220, 251,  871, 902,  28,  28,   629, 660,  660, 691,
+  896, 896,  29,  60,   897, 928,  61,  92,   410, 441,  598, 629,  691, 722,
+  813, 844,  898, 929,  252, 283,  872, 903,  93,  124,  347, 378,  473, 504,
+  783, 814,  843, 874,  899, 930,  567, 598,  722, 753,  125, 156,  900, 931,
+  284, 315,  536, 567,  753, 784,  873, 904,  157, 188,  901, 932,  442, 473,
+  814, 845,  379, 410,  844, 875,  189, 220,  902, 933,  505, 536,  784, 815,
+  661, 692,  316, 347,  630, 661,  692, 723,  874, 905,  221, 252,  599, 630,
+  723, 754,  903, 934,  411, 442,  845, 876,  29,  29,   568, 599,  754, 785,
+  928, 928,  30,  61,   474, 505,  815, 846,  929, 960,  62,  93,   930, 961,
+  253, 284,  348, 379,  875, 906,  904, 935,  94,  125,  931, 962,  537, 568,
+  785, 816,  126, 157,  932, 963,  285, 316,  905, 936,  158, 189,  443, 474,
+  662, 693,  693, 724,  846, 877,  933, 964,  380, 411,  876, 907,  631, 662,
+  724, 755,  506, 537,  816, 847,  190, 221,  934, 965,  600, 631,  755, 786,
+  317, 348,  906, 937,  222, 253,  569, 600,  786, 817,  935, 966,  412, 443,
+  877, 908,  475, 506,  847, 878,  30,  30,   960, 960,  31,  62,   349, 380,
+  907, 938,  961, 992,  254, 285,  936, 967,  63,  94,   538, 569,  817, 848,
+  962, 993,  694, 725,  95,  126,  663, 694,  725, 756,  963, 994,  632, 663,
+  756, 787,  127, 158,  964, 995,  444, 475,  878, 909,  286, 317,  937, 968,
+  381, 412,  507, 538,  848, 879,  908, 939,  159, 190,  601, 632,  787, 818,
+  965, 996,  191, 222,  966, 997,  318, 349,  570, 601,  818, 849,  938, 969,
+  476, 507,  879, 910,  223, 254,  413, 444,  909, 940,  967, 998,  695, 726,
+  726, 757,  664, 695,  757, 788,  539, 570,  849, 880,  350, 381,  939, 970,
+  255, 286,  633, 664,  788, 819,  968, 999,  445, 476,  602, 633,  819, 850,
+  910, 941,  508, 539,  880, 911,  287, 318,  969, 1000, 382, 413,  940, 971,
+  571, 602,  850, 881,  727, 758,  696, 727,  758, 789,  319, 350,  970, 1001,
+  477, 508,  665, 696,  789, 820,  911, 942,  414, 445,  941, 972,  540, 571,
+  881, 912,  634, 665,  820, 851,  351, 382,  971, 1002, 603, 634,  851, 882,
+  446, 477,  942, 973,  509, 540,  912, 943,  383, 414,  728, 759,  759, 790,
+  972, 1003, 572, 603,  882, 913,  697, 728,  790, 821,  666, 697,  821, 852,
+  478, 509,  943, 974,  635, 666,  852, 883,  415, 446,  541, 572,  913, 944,
+  973, 1004, 604, 635,  883, 914,  760, 791,  729, 760,  791, 822,  510, 541,
+  944, 975,  447, 478,  974, 1005, 698, 729,  822, 853,  573, 604,  914, 945,
+  667, 698,  853, 884,  636, 667,  884, 915,  479, 510,  975, 1006, 542, 573,
+  945, 976,  761, 792,  792, 823,  605, 636,  915, 946,  730, 761,  823, 854,
+  699, 730,  854, 885,  511, 542,  976, 1007, 574, 605,  946, 977,  668, 699,
+  885, 916,  637, 668,  916, 947,  543, 574,  793, 824,  977, 1008, 762, 793,
+  824, 855,  731, 762,  855, 886,  606, 637,  947, 978,  700, 731,  886, 917,
+  669, 700,  917, 948,  575, 606,  978, 1009, 638, 669,  948, 979,  794, 825,
+  825, 856,  763, 794,  856, 887,  732, 763,  887, 918,  607, 638,  979, 1010,
+  701, 732,  918, 949,  670, 701,  949, 980,  826, 857,  795, 826,  857, 888,
+  764, 795,  888, 919,  639, 670,  980, 1011, 733, 764,  919, 950,  702, 733,
+  950, 981,  671, 702,  981, 1012, 827, 858,  858, 889,  796, 827,  889, 920,
+  765, 796,  920, 951,  734, 765,  951, 982,  703, 734,  982, 1013, 859, 890,
+  828, 859,  890, 921,  797, 828,  921, 952,  766, 797,  952, 983,  735, 766,
+  983, 1014, 860, 891,  891, 922,  829, 860,  922, 953,  798, 829,  953, 984,
+  767, 798,  984, 1015, 892, 923,  861, 892,  923, 954,  830, 861,  954, 985,
+  799, 830,  985, 1016, 893, 924,  924, 955,  862, 893,  955, 986,  831, 862,
+  986, 1017, 925, 956,  894, 925,  956, 987,  863, 894,  987, 1018, 926, 957,
+  957, 988,  895, 926,  988, 1019, 958, 989,  927, 958,  989, 1020, 959, 990,
+  990, 1021, 991, 1022, 0,   0
+};
+#endif  // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_4x4[16]) = {
+  0, 2, 5, 8, 1, 3, 9, 12, 4, 7, 11, 14, 6, 10, 13, 15,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_4x4[16]) = {
+  0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_4x4[16]) = {
+  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+};
+#endif  // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_col_iscan_4x4[16]) = {
+  0, 3, 7, 11, 1, 5, 9, 12, 2, 6, 10, 14, 4, 8, 13, 15,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_row_iscan_4x4[16]) = {
+  0, 1, 3, 5, 2, 4, 6, 9, 7, 8, 11, 13, 10, 12, 14, 15,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_4x8[32]) = {
+  0,  1,  4,  9,  2,  3,  6,  11, 5,  7,  8,  13, 10, 12, 14, 17,
+  15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_4x8[32]) = {
+  0, 8,  16, 24, 1, 9,  17, 25, 2, 10, 18, 26, 3, 11, 19, 27,
+  4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_4x8[32]) = {
+  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_8x4[32]) = {
+  0, 1, 4, 9,  15, 19, 24, 28, 2,  3,  6,  11, 16, 21, 25, 29,
+  5, 7, 8, 13, 18, 22, 26, 30, 10, 12, 14, 17, 20, 23, 27, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_8x4[32]) = {
+  0, 4, 8,  12, 16, 20, 24, 28, 1, 5, 9,  13, 17, 21, 25, 29,
+  2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_8x4[32]) = {
+  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+};
+#endif  // CONFIG_EXT_TX
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_8x8[64]) = {
+  0, 8,  16, 24, 32, 40, 48, 56, 1, 9,  17, 25, 33, 41, 49, 57,
+  2, 10, 18, 26, 34, 42, 50, 58, 3, 11, 19, 27, 35, 43, 51, 59,
+  4, 12, 20, 28, 36, 44, 52, 60, 5, 13, 21, 29, 37, 45, 53, 61,
+  6, 14, 22, 30, 38, 46, 54, 62, 7, 15, 23, 31, 39, 47, 55, 63,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_8x8[64]) = {
+  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+};
+#endif  // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_col_iscan_8x8[64]) = {
+  0,  3,  8,  15, 22, 32, 40, 47, 1,  5,  11, 18, 26, 34, 44, 51,
+  2,  7,  13, 20, 28, 38, 46, 54, 4,  10, 16, 24, 31, 41, 50, 56,
+  6,  12, 21, 27, 35, 43, 52, 58, 9,  17, 25, 33, 39, 48, 55, 60,
+  14, 23, 30, 37, 45, 53, 59, 62, 19, 29, 36, 42, 49, 57, 61, 63,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_row_iscan_8x8[64]) = {
+  0,  1,  2,  5,  8,  12, 19, 24, 3,  4,  7,  10, 15, 20, 30, 39,
+  6,  9,  13, 16, 21, 27, 37, 46, 11, 14, 17, 23, 28, 34, 44, 52,
+  18, 22, 25, 31, 35, 41, 50, 57, 26, 29, 33, 38, 43, 49, 55, 59,
+  32, 36, 42, 47, 51, 54, 60, 61, 40, 45, 48, 53, 56, 58, 62, 63,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_8x8[64]) = {
+  0,  2,  5,  9,  14, 22, 31, 37, 1,  4,  8,  13, 19, 26, 38, 44,
+  3,  6,  10, 17, 24, 30, 42, 49, 7,  11, 15, 21, 29, 36, 47, 53,
+  12, 16, 20, 27, 34, 43, 52, 57, 18, 23, 28, 35, 41, 48, 56, 60,
+  25, 32, 39, 45, 50, 55, 59, 62, 33, 40, 46, 51, 54, 58, 61, 63,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_8x16[128]) = {
+  0,  1,  3,   6,   10,  15,  21,  28,  2,  4,   7,   11,  16,  22,  29,  36,
+  5,  8,  12,  17,  23,  30,  37,  44,  9,  13,  18,  24,  31,  38,  45,  52,
+  14, 19, 25,  32,  39,  46,  53,  60,  20, 26,  33,  40,  47,  54,  61,  68,
+  27, 34, 41,  48,  55,  62,  69,  76,  35, 42,  49,  56,  63,  70,  77,  84,
+  43, 50, 57,  64,  71,  78,  85,  92,  51, 58,  65,  72,  79,  86,  93,  100,
+  59, 66, 73,  80,  87,  94,  101, 107, 67, 74,  81,  88,  95,  102, 108, 113,
+  75, 82, 89,  96,  103, 109, 114, 118, 83, 90,  97,  104, 110, 115, 119, 122,
+  91, 98, 105, 111, 116, 120, 123, 125, 99, 106, 112, 117, 121, 124, 126, 127,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_16x8[128]) = {
+  0,  1,  3,  6,  10, 15, 21, 28, 36, 44,  52,  60,  68,  76,  84,  92,
+  2,  4,  7,  11, 16, 22, 29, 37, 45, 53,  61,  69,  77,  85,  93,  100,
+  5,  8,  12, 17, 23, 30, 38, 46, 54, 62,  70,  78,  86,  94,  101, 107,
+  9,  13, 18, 24, 31, 39, 47, 55, 63, 71,  79,  87,  95,  102, 108, 113,
+  14, 19, 25, 32, 40, 48, 56, 64, 72, 80,  88,  96,  103, 109, 114, 118,
+  20, 26, 33, 41, 49, 57, 65, 73, 81, 89,  97,  104, 110, 115, 119, 122,
+  27, 34, 42, 50, 58, 66, 74, 82, 90, 98,  105, 111, 116, 120, 123, 125,
+  35, 43, 51, 59, 67, 75, 83, 91, 99, 106, 112, 117, 121, 124, 126, 127,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_8x16[128]) = {
+  0,  16, 32, 48, 64, 80, 96,  112, 1,  17, 33, 49, 65, 81, 97,  113,
+  2,  18, 34, 50, 66, 82, 98,  114, 3,  19, 35, 51, 67, 83, 99,  115,
+  4,  20, 36, 52, 68, 84, 100, 116, 5,  21, 37, 53, 69, 85, 101, 117,
+  6,  22, 38, 54, 70, 86, 102, 118, 7,  23, 39, 55, 71, 87, 103, 119,
+  8,  24, 40, 56, 72, 88, 104, 120, 9,  25, 41, 57, 73, 89, 105, 121,
+  10, 26, 42, 58, 74, 90, 106, 122, 11, 27, 43, 59, 75, 91, 107, 123,
+  12, 28, 44, 60, 76, 92, 108, 124, 13, 29, 45, 61, 77, 93, 109, 125,
+  14, 30, 46, 62, 78, 94, 110, 126, 15, 31, 47, 63, 79, 95, 111, 127,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_16x8[128]) = {
+  0, 8,  16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96,  104, 112, 120,
+  1, 9,  17, 25, 33, 41, 49, 57, 65, 73, 81, 89, 97,  105, 113, 121,
+  2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98,  106, 114, 122,
+  3, 11, 19, 27, 35, 43, 51, 59, 67, 75, 83, 91, 99,  107, 115, 123,
+  4, 12, 20, 28, 36, 44, 52, 60, 68, 76, 84, 92, 100, 108, 116, 124,
+  5, 13, 21, 29, 37, 45, 53, 61, 69, 77, 85, 93, 101, 109, 117, 125,
+  6, 14, 22, 30, 38, 46, 54, 62, 70, 78, 86, 94, 102, 110, 118, 126,
+  7, 15, 23, 31, 39, 47, 55, 63, 71, 79, 87, 95, 103, 111, 119, 127,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_8x16[128]) = {
+  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,
+  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
+  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+  90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
+  105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+  120, 121, 122, 123, 124, 125, 126, 127,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_16x8[128]) = {
+  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,
+  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
+  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+  90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
+  105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+  120, 121, 122, 123, 124, 125, 126, 127,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_16x32[512]) = {
+  0,   1,   3,   6,   10,  15,  21,  28,  36,  45,  55,  66,  78,  91,  105,
+  120, 2,   4,   7,   11,  16,  22,  29,  37,  46,  56,  67,  79,  92,  106,
+  121, 136, 5,   8,   12,  17,  23,  30,  38,  47,  57,  68,  80,  93,  107,
+  122, 137, 152, 9,   13,  18,  24,  31,  39,  48,  58,  69,  81,  94,  108,
+  123, 138, 153, 168, 14,  19,  25,  32,  40,  49,  59,  70,  82,  95,  109,
+  124, 139, 154, 169, 184, 20,  26,  33,  41,  50,  60,  71,  83,  96,  110,
+  125, 140, 155, 170, 185, 200, 27,  34,  42,  51,  61,  72,  84,  97,  111,
+  126, 141, 156, 171, 186, 201, 216, 35,  43,  52,  62,  73,  85,  98,  112,
+  127, 142, 157, 172, 187, 202, 217, 232, 44,  53,  63,  74,  86,  99,  113,
+  128, 143, 158, 173, 188, 203, 218, 233, 248, 54,  64,  75,  87,  100, 114,
+  129, 144, 159, 174, 189, 204, 219, 234, 249, 264, 65,  76,  88,  101, 115,
+  130, 145, 160, 175, 190, 205, 220, 235, 250, 265, 280, 77,  89,  102, 116,
+  131, 146, 161, 176, 191, 206, 221, 236, 251, 266, 281, 296, 90,  103, 117,
+  132, 147, 162, 177, 192, 207, 222, 237, 252, 267, 282, 297, 312, 104, 118,
+  133, 148, 163, 178, 193, 208, 223, 238, 253, 268, 283, 298, 313, 328, 119,
+  134, 149, 164, 179, 194, 209, 224, 239, 254, 269, 284, 299, 314, 329, 344,
+  135, 150, 165, 180, 195, 210, 225, 240, 255, 270, 285, 300, 315, 330, 345,
+  360, 151, 166, 181, 196, 211, 226, 241, 256, 271, 286, 301, 316, 331, 346,
+  361, 376, 167, 182, 197, 212, 227, 242, 257, 272, 287, 302, 317, 332, 347,
+  362, 377, 392, 183, 198, 213, 228, 243, 258, 273, 288, 303, 318, 333, 348,
+  363, 378, 393, 407, 199, 214, 229, 244, 259, 274, 289, 304, 319, 334, 349,
+  364, 379, 394, 408, 421, 215, 230, 245, 260, 275, 290, 305, 320, 335, 350,
+  365, 380, 395, 409, 422, 434, 231, 246, 261, 276, 291, 306, 321, 336, 351,
+  366, 381, 396, 410, 423, 435, 446, 247, 262, 277, 292, 307, 322, 337, 352,
+  367, 382, 397, 411, 424, 436, 447, 457, 263, 278, 293, 308, 323, 338, 353,
+  368, 383, 398, 412, 425, 437, 448, 458, 467, 279, 294, 309, 324, 339, 354,
+  369, 384, 399, 413, 426, 438, 449, 459, 468, 476, 295, 310, 325, 340, 355,
+  370, 385, 400, 414, 427, 439, 450, 460, 469, 477, 484, 311, 326, 341, 356,
+  371, 386, 401, 415, 428, 440, 451, 461, 470, 478, 485, 491, 327, 342, 357,
+  372, 387, 402, 416, 429, 441, 452, 462, 471, 479, 486, 492, 497, 343, 358,
+  373, 388, 403, 417, 430, 442, 453, 463, 472, 480, 487, 493, 498, 502, 359,
+  374, 389, 404, 418, 431, 443, 454, 464, 473, 481, 488, 494, 499, 503, 506,
+  375, 390, 405, 419, 432, 444, 455, 465, 474, 482, 489, 495, 500, 504, 507,
+  509, 391, 406, 420, 433, 445, 456, 466, 475, 483, 490, 496, 501, 505, 508,
+  510, 511,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_32x16[512]) = {
+  0,   1,   3,   6,   10,  15,  21,  28,  36,  45,  55,  66,  78,  91,  105,
+  120, 136, 152, 168, 184, 200, 216, 232, 248, 264, 280, 296, 312, 328, 344,
+  360, 376, 2,   4,   7,   11,  16,  22,  29,  37,  46,  56,  67,  79,  92,
+  106, 121, 137, 153, 169, 185, 201, 217, 233, 249, 265, 281, 297, 313, 329,
+  345, 361, 377, 392, 5,   8,   12,  17,  23,  30,  38,  47,  57,  68,  80,
+  93,  107, 122, 138, 154, 170, 186, 202, 218, 234, 250, 266, 282, 298, 314,
+  330, 346, 362, 378, 393, 407, 9,   13,  18,  24,  31,  39,  48,  58,  69,
+  81,  94,  108, 123, 139, 155, 171, 187, 203, 219, 235, 251, 267, 283, 299,
+  315, 331, 347, 363, 379, 394, 408, 421, 14,  19,  25,  32,  40,  49,  59,
+  70,  82,  95,  109, 124, 140, 156, 172, 188, 204, 220, 236, 252, 268, 284,
+  300, 316, 332, 348, 364, 380, 395, 409, 422, 434, 20,  26,  33,  41,  50,
+  60,  71,  83,  96,  110, 125, 141, 157, 173, 189, 205, 221, 237, 253, 269,
+  285, 301, 317, 333, 349, 365, 381, 396, 410, 423, 435, 446, 27,  34,  42,
+  51,  61,  72,  84,  97,  111, 126, 142, 158, 174, 190, 206, 222, 238, 254,
+  270, 286, 302, 318, 334, 350, 366, 382, 397, 411, 424, 436, 447, 457, 35,
+  43,  52,  62,  73,  85,  98,  112, 127, 143, 159, 175, 191, 207, 223, 239,
+  255, 271, 287, 303, 319, 335, 351, 367, 383, 398, 412, 425, 437, 448, 458,
+  467, 44,  53,  63,  74,  86,  99,  113, 128, 144, 160, 176, 192, 208, 224,
+  240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 399, 413, 426, 438, 449,
+  459, 468, 476, 54,  64,  75,  87,  100, 114, 129, 145, 161, 177, 193, 209,
+  225, 241, 257, 273, 289, 305, 321, 337, 353, 369, 385, 400, 414, 427, 439,
+  450, 460, 469, 477, 484, 65,  76,  88,  101, 115, 130, 146, 162, 178, 194,
+  210, 226, 242, 258, 274, 290, 306, 322, 338, 354, 370, 386, 401, 415, 428,
+  440, 451, 461, 470, 478, 485, 491, 77,  89,  102, 116, 131, 147, 163, 179,
+  195, 211, 227, 243, 259, 275, 291, 307, 323, 339, 355, 371, 387, 402, 416,
+  429, 441, 452, 462, 471, 479, 486, 492, 497, 90,  103, 117, 132, 148, 164,
+  180, 196, 212, 228, 244, 260, 276, 292, 308, 324, 340, 356, 372, 388, 403,
+  417, 430, 442, 453, 463, 472, 480, 487, 493, 498, 502, 104, 118, 133, 149,
+  165, 181, 197, 213, 229, 245, 261, 277, 293, 309, 325, 341, 357, 373, 389,
+  404, 418, 431, 443, 454, 464, 473, 481, 488, 494, 499, 503, 506, 119, 134,
+  150, 166, 182, 198, 214, 230, 246, 262, 278, 294, 310, 326, 342, 358, 374,
+  390, 405, 419, 432, 444, 455, 465, 474, 482, 489, 495, 500, 504, 507, 509,
+  135, 151, 167, 183, 199, 215, 231, 247, 263, 279, 295, 311, 327, 343, 359,
+  375, 391, 406, 420, 433, 445, 456, 466, 475, 483, 490, 496, 501, 505, 508,
+  510, 511,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_16x32[512]) = {
+  0,  32, 64, 96,  128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 480,
+  1,  33, 65, 97,  129, 161, 193, 225, 257, 289, 321, 353, 385, 417, 449, 481,
+  2,  34, 66, 98,  130, 162, 194, 226, 258, 290, 322, 354, 386, 418, 450, 482,
+  3,  35, 67, 99,  131, 163, 195, 227, 259, 291, 323, 355, 387, 419, 451, 483,
+  4,  36, 68, 100, 132, 164, 196, 228, 260, 292, 324, 356, 388, 420, 452, 484,
+  5,  37, 69, 101, 133, 165, 197, 229, 261, 293, 325, 357, 389, 421, 453, 485,
+  6,  38, 70, 102, 134, 166, 198, 230, 262, 294, 326, 358, 390, 422, 454, 486,
+  7,  39, 71, 103, 135, 167, 199, 231, 263, 295, 327, 359, 391, 423, 455, 487,
+  8,  40, 72, 104, 136, 168, 200, 232, 264, 296, 328, 360, 392, 424, 456, 488,
+  9,  41, 73, 105, 137, 169, 201, 233, 265, 297, 329, 361, 393, 425, 457, 489,
+  10, 42, 74, 106, 138, 170, 202, 234, 266, 298, 330, 362, 394, 426, 458, 490,
+  11, 43, 75, 107, 139, 171, 203, 235, 267, 299, 331, 363, 395, 427, 459, 491,
+  12, 44, 76, 108, 140, 172, 204, 236, 268, 300, 332, 364, 396, 428, 460, 492,
+  13, 45, 77, 109, 141, 173, 205, 237, 269, 301, 333, 365, 397, 429, 461, 493,
+  14, 46, 78, 110, 142, 174, 206, 238, 270, 302, 334, 366, 398, 430, 462, 494,
+  15, 47, 79, 111, 143, 175, 207, 239, 271, 303, 335, 367, 399, 431, 463, 495,
+  16, 48, 80, 112, 144, 176, 208, 240, 272, 304, 336, 368, 400, 432, 464, 496,
+  17, 49, 81, 113, 145, 177, 209, 241, 273, 305, 337, 369, 401, 433, 465, 497,
+  18, 50, 82, 114, 146, 178, 210, 242, 274, 306, 338, 370, 402, 434, 466, 498,
+  19, 51, 83, 115, 147, 179, 211, 243, 275, 307, 339, 371, 403, 435, 467, 499,
+  20, 52, 84, 116, 148, 180, 212, 244, 276, 308, 340, 372, 404, 436, 468, 500,
+  21, 53, 85, 117, 149, 181, 213, 245, 277, 309, 341, 373, 405, 437, 469, 501,
+  22, 54, 86, 118, 150, 182, 214, 246, 278, 310, 342, 374, 406, 438, 470, 502,
+  23, 55, 87, 119, 151, 183, 215, 247, 279, 311, 343, 375, 407, 439, 471, 503,
+  24, 56, 88, 120, 152, 184, 216, 248, 280, 312, 344, 376, 408, 440, 472, 504,
+  25, 57, 89, 121, 153, 185, 217, 249, 281, 313, 345, 377, 409, 441, 473, 505,
+  26, 58, 90, 122, 154, 186, 218, 250, 282, 314, 346, 378, 410, 442, 474, 506,
+  27, 59, 91, 123, 155, 187, 219, 251, 283, 315, 347, 379, 411, 443, 475, 507,
+  28, 60, 92, 124, 156, 188, 220, 252, 284, 316, 348, 380, 412, 444, 476, 508,
+  29, 61, 93, 125, 157, 189, 221, 253, 285, 317, 349, 381, 413, 445, 477, 509,
+  30, 62, 94, 126, 158, 190, 222, 254, 286, 318, 350, 382, 414, 446, 478, 510,
+  31, 63, 95, 127, 159, 191, 223, 255, 287, 319, 351, 383, 415, 447, 479, 511,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_32x16[512]) = {
+  0,   16,  32,  48,  64,  80,  96,  112, 128, 144, 160, 176, 192, 208, 224,
+  240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400, 416, 432, 448, 464,
+  480, 496, 1,   17,  33,  49,  65,  81,  97,  113, 129, 145, 161, 177, 193,
+  209, 225, 241, 257, 273, 289, 305, 321, 337, 353, 369, 385, 401, 417, 433,
+  449, 465, 481, 497, 2,   18,  34,  50,  66,  82,  98,  114, 130, 146, 162,
+  178, 194, 210, 226, 242, 258, 274, 290, 306, 322, 338, 354, 370, 386, 402,
+  418, 434, 450, 466, 482, 498, 3,   19,  35,  51,  67,  83,  99,  115, 131,
+  147, 163, 179, 195, 211, 227, 243, 259, 275, 291, 307, 323, 339, 355, 371,
+  387, 403, 419, 435, 451, 467, 483, 499, 4,   20,  36,  52,  68,  84,  100,
+  116, 132, 148, 164, 180, 196, 212, 228, 244, 260, 276, 292, 308, 324, 340,
+  356, 372, 388, 404, 420, 436, 452, 468, 484, 500, 5,   21,  37,  53,  69,
+  85,  101, 117, 133, 149, 165, 181, 197, 213, 229, 245, 261, 277, 293, 309,
+  325, 341, 357, 373, 389, 405, 421, 437, 453, 469, 485, 501, 6,   22,  38,
+  54,  70,  86,  102, 118, 134, 150, 166, 182, 198, 214, 230, 246, 262, 278,
+  294, 310, 326, 342, 358, 374, 390, 406, 422, 438, 454, 470, 486, 502, 7,
+  23,  39,  55,  71,  87,  103, 119, 135, 151, 167, 183, 199, 215, 231, 247,
+  263, 279, 295, 311, 327, 343, 359, 375, 391, 407, 423, 439, 455, 471, 487,
+  503, 8,   24,  40,  56,  72,  88,  104, 120, 136, 152, 168, 184, 200, 216,
+  232, 248, 264, 280, 296, 312, 328, 344, 360, 376, 392, 408, 424, 440, 456,
+  472, 488, 504, 9,   25,  41,  57,  73,  89,  105, 121, 137, 153, 169, 185,
+  201, 217, 233, 249, 265, 281, 297, 313, 329, 345, 361, 377, 393, 409, 425,
+  441, 457, 473, 489, 505, 10,  26,  42,  58,  74,  90,  106, 122, 138, 154,
+  170, 186, 202, 218, 234, 250, 266, 282, 298, 314, 330, 346, 362, 378, 394,
+  410, 426, 442, 458, 474, 490, 506, 11,  27,  43,  59,  75,  91,  107, 123,
+  139, 155, 171, 187, 203, 219, 235, 251, 267, 283, 299, 315, 331, 347, 363,
+  379, 395, 411, 427, 443, 459, 475, 491, 507, 12,  28,  44,  60,  76,  92,
+  108, 124, 140, 156, 172, 188, 204, 220, 236, 252, 268, 284, 300, 316, 332,
+  348, 364, 380, 396, 412, 428, 444, 460, 476, 492, 508, 13,  29,  45,  61,
+  77,  93,  109, 125, 141, 157, 173, 189, 205, 221, 237, 253, 269, 285, 301,
+  317, 333, 349, 365, 381, 397, 413, 429, 445, 461, 477, 493, 509, 14,  30,
+  46,  62,  78,  94,  110, 126, 142, 158, 174, 190, 206, 222, 238, 254, 270,
+  286, 302, 318, 334, 350, 366, 382, 398, 414, 430, 446, 462, 478, 494, 510,
+  15,  31,  47,  63,  79,  95,  111, 127, 143, 159, 175, 191, 207, 223, 239,
+  255, 271, 287, 303, 319, 335, 351, 367, 383, 399, 415, 431, 447, 463, 479,
+  495, 511,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_16x32[512]) = {
+  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,
+  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
+  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+  90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
+  105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+  120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
+  135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+  150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
+  165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
+  180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
+  195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
+  210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
+  225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+  240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
+  255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269,
+  270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284,
+  285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299,
+  300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314,
+  315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329,
+  330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344,
+  345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359,
+  360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374,
+  375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389,
+  390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404,
+  405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419,
+  420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434,
+  435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449,
+  450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464,
+  465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479,
+  480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494,
+  495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509,
+  510, 511,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_32x16[512]) = {
+  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,
+  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
+  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+  90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
+  105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+  120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
+  135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+  150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
+  165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
+  180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
+  195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
+  210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
+  225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+  240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
+  255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269,
+  270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284,
+  285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299,
+  300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314,
+  315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329,
+  330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344,
+  345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359,
+  360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374,
+  375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389,
+  390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404,
+  405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419,
+  420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434,
+  435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449,
+  450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464,
+  465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479,
+  480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494,
+  495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509,
+  510, 511,
+};
+
+#endif  // CONFIG_EXT_TX
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_16x16[256]) = {
+  0,  16, 32, 48, 64, 80, 96,  112, 128, 144, 160, 176, 192, 208, 224, 240,
+  1,  17, 33, 49, 65, 81, 97,  113, 129, 145, 161, 177, 193, 209, 225, 241,
+  2,  18, 34, 50, 66, 82, 98,  114, 130, 146, 162, 178, 194, 210, 226, 242,
+  3,  19, 35, 51, 67, 83, 99,  115, 131, 147, 163, 179, 195, 211, 227, 243,
+  4,  20, 36, 52, 68, 84, 100, 116, 132, 148, 164, 180, 196, 212, 228, 244,
+  5,  21, 37, 53, 69, 85, 101, 117, 133, 149, 165, 181, 197, 213, 229, 245,
+  6,  22, 38, 54, 70, 86, 102, 118, 134, 150, 166, 182, 198, 214, 230, 246,
+  7,  23, 39, 55, 71, 87, 103, 119, 135, 151, 167, 183, 199, 215, 231, 247,
+  8,  24, 40, 56, 72, 88, 104, 120, 136, 152, 168, 184, 200, 216, 232, 248,
+  9,  25, 41, 57, 73, 89, 105, 121, 137, 153, 169, 185, 201, 217, 233, 249,
+  10, 26, 42, 58, 74, 90, 106, 122, 138, 154, 170, 186, 202, 218, 234, 250,
+  11, 27, 43, 59, 75, 91, 107, 123, 139, 155, 171, 187, 203, 219, 235, 251,
+  12, 28, 44, 60, 76, 92, 108, 124, 140, 156, 172, 188, 204, 220, 236, 252,
+  13, 29, 45, 61, 77, 93, 109, 125, 141, 157, 173, 189, 205, 221, 237, 253,
+  14, 30, 46, 62, 78, 94, 110, 126, 142, 158, 174, 190, 206, 222, 238, 254,
+  15, 31, 47, 63, 79, 95, 111, 127, 143, 159, 175, 191, 207, 223, 239, 255,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_16x16[256]) = {
+  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,
+  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
+  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+  90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
+  105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+  120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
+  135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+  150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
+  165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
+  180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
+  195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
+  210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
+  225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+  240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
+  255,
+};
+#endif  // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_col_iscan_16x16[256]) = {
+  0,  4,  11,  20,  31,  43,  59,  75,  85,  109, 130, 150, 165, 181, 195, 198,
+  1,  6,  14,  23,  34,  47,  64,  81,  95,  114, 135, 153, 171, 188, 201, 212,
+  2,  8,  16,  25,  38,  52,  67,  83,  101, 116, 136, 157, 172, 190, 205, 216,
+  3,  10, 18,  29,  41,  55,  71,  89,  103, 119, 141, 159, 176, 194, 208, 218,
+  5,  12, 21,  32,  45,  58,  74,  93,  104, 123, 144, 164, 179, 196, 210, 223,
+  7,  15, 26,  37,  49,  63,  78,  96,  112, 129, 146, 166, 182, 200, 215, 228,
+  9,  19, 28,  39,  54,  69,  86,  102, 117, 132, 151, 170, 187, 206, 220, 230,
+  13, 24, 35,  46,  60,  73,  91,  108, 122, 137, 154, 174, 189, 207, 224, 235,
+  17, 30, 40,  53,  66,  82,  98,  115, 126, 142, 161, 180, 197, 213, 227, 237,
+  22, 36, 48,  62,  76,  92,  105, 120, 133, 147, 167, 186, 203, 219, 232, 240,
+  27, 44, 56,  70,  84,  99,  113, 127, 140, 156, 175, 193, 209, 226, 236, 244,
+  33, 51, 68,  79,  94,  110, 125, 138, 149, 162, 184, 202, 217, 229, 241, 247,
+  42, 61, 77,  90,  106, 121, 134, 148, 160, 173, 191, 211, 225, 238, 245, 251,
+  50, 72, 87,  100, 118, 128, 145, 158, 168, 183, 204, 222, 233, 242, 249, 253,
+  57, 80, 97,  111, 131, 143, 155, 169, 178, 192, 214, 231, 239, 246, 250, 254,
+  65, 88, 107, 124, 139, 152, 163, 177, 185, 199, 221, 234, 243, 248, 252, 255,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_row_iscan_16x16[256]) = {
+  0,   1,   2,   4,   6,   9,   12,  17,  22,  29,  36,  43,  54,  64,  76,
+  86,  3,   5,   7,   11,  15,  19,  25,  32,  38,  48,  59,  68,  84,  99,
+  115, 130, 8,   10,  13,  18,  23,  27,  33,  42,  51,  60,  72,  88,  103,
+  119, 142, 167, 14,  16,  20,  26,  31,  37,  44,  53,  61,  73,  85,  100,
+  116, 135, 161, 185, 21,  24,  30,  35,  40,  47,  55,  65,  74,  81,  94,
+  112, 133, 154, 179, 205, 28,  34,  39,  45,  50,  58,  67,  77,  87,  96,
+  106, 121, 146, 169, 196, 212, 41,  46,  49,  56,  63,  70,  79,  90,  98,
+  107, 122, 138, 159, 182, 207, 222, 52,  57,  62,  69,  75,  83,  93,  102,
+  110, 120, 134, 150, 176, 195, 215, 226, 66,  71,  78,  82,  91,  97,  108,
+  113, 127, 136, 148, 168, 188, 202, 221, 232, 80,  89,  92,  101, 105, 114,
+  125, 131, 139, 151, 162, 177, 192, 208, 223, 234, 95,  104, 109, 117, 123,
+  128, 143, 144, 155, 165, 175, 190, 206, 219, 233, 239, 111, 118, 124, 129,
+  140, 147, 157, 164, 170, 181, 191, 203, 224, 230, 240, 243, 126, 132, 137,
+  145, 153, 160, 174, 178, 184, 197, 204, 216, 231, 237, 244, 246, 141, 149,
+  156, 166, 172, 180, 189, 199, 200, 210, 220, 228, 238, 242, 249, 251, 152,
+  163, 171, 183, 186, 193, 201, 211, 214, 218, 227, 236, 245, 247, 252, 253,
+  158, 173, 187, 194, 198, 209, 213, 217, 225, 229, 235, 241, 248, 250, 254,
+  255,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_16x16[256]) = {
+  0,   2,   5,   9,   17,  24,  36,  44,  55,  72,  88,  104, 128, 143, 166,
+  179, 1,   4,   8,   13,  20,  30,  40,  54,  66,  79,  96,  113, 141, 154,
+  178, 196, 3,   7,   11,  18,  25,  33,  46,  57,  71,  86,  101, 119, 148,
+  164, 186, 201, 6,   12,  16,  23,  31,  39,  53,  64,  78,  92,  110, 127,
+  153, 169, 193, 208, 10,  14,  19,  28,  37,  47,  58,  67,  84,  98,  114,
+  133, 161, 176, 198, 214, 15,  21,  26,  34,  43,  52,  65,  77,  91,  106,
+  120, 140, 165, 185, 205, 221, 22,  27,  32,  41,  48,  60,  73,  85,  99,
+  116, 130, 151, 175, 190, 211, 225, 29,  35,  42,  49,  59,  69,  81,  95,
+  108, 125, 139, 155, 182, 197, 217, 229, 38,  45,  51,  61,  68,  80,  93,
+  105, 118, 134, 150, 168, 191, 207, 223, 234, 50,  56,  63,  74,  83,  94,
+  109, 117, 129, 147, 163, 177, 199, 213, 228, 238, 62,  70,  76,  87,  97,
+  107, 122, 131, 145, 159, 172, 188, 210, 222, 235, 242, 75,  82,  90,  102,
+  112, 124, 138, 146, 157, 173, 187, 202, 219, 230, 240, 245, 89,  100, 111,
+  123, 132, 142, 156, 167, 180, 189, 203, 216, 231, 237, 246, 250, 103, 115,
+  126, 136, 149, 162, 171, 183, 194, 204, 215, 224, 236, 241, 248, 252, 121,
+  135, 144, 158, 170, 181, 192, 200, 209, 218, 227, 233, 243, 244, 251, 254,
+  137, 152, 160, 174, 184, 195, 206, 212, 220, 226, 232, 239, 247, 249, 253,
+  255,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_32x32[1024]) = {
+  0,   32,   64,  96,   128, 160,  192, 224,  256, 288,  320, 352,  384, 416,
+  448, 480,  512, 544,  576, 608,  640, 672,  704, 736,  768, 800,  832, 864,
+  896, 928,  960, 992,  1,   33,   65,  97,   129, 161,  193, 225,  257, 289,
+  321, 353,  385, 417,  449, 481,  513, 545,  577, 609,  641, 673,  705, 737,
+  769, 801,  833, 865,  897, 929,  961, 993,  2,   34,   66,  98,   130, 162,
+  194, 226,  258, 290,  322, 354,  386, 418,  450, 482,  514, 546,  578, 610,
+  642, 674,  706, 738,  770, 802,  834, 866,  898, 930,  962, 994,  3,   35,
+  67,  99,   131, 163,  195, 227,  259, 291,  323, 355,  387, 419,  451, 483,
+  515, 547,  579, 611,  643, 675,  707, 739,  771, 803,  835, 867,  899, 931,
+  963, 995,  4,   36,   68,  100,  132, 164,  196, 228,  260, 292,  324, 356,
+  388, 420,  452, 484,  516, 548,  580, 612,  644, 676,  708, 740,  772, 804,
+  836, 868,  900, 932,  964, 996,  5,   37,   69,  101,  133, 165,  197, 229,
+  261, 293,  325, 357,  389, 421,  453, 485,  517, 549,  581, 613,  645, 677,
+  709, 741,  773, 805,  837, 869,  901, 933,  965, 997,  6,   38,   70,  102,
+  134, 166,  198, 230,  262, 294,  326, 358,  390, 422,  454, 486,  518, 550,
+  582, 614,  646, 678,  710, 742,  774, 806,  838, 870,  902, 934,  966, 998,
+  7,   39,   71,  103,  135, 167,  199, 231,  263, 295,  327, 359,  391, 423,
+  455, 487,  519, 551,  583, 615,  647, 679,  711, 743,  775, 807,  839, 871,
+  903, 935,  967, 999,  8,   40,   72,  104,  136, 168,  200, 232,  264, 296,
+  328, 360,  392, 424,  456, 488,  520, 552,  584, 616,  648, 680,  712, 744,
+  776, 808,  840, 872,  904, 936,  968, 1000, 9,   41,   73,  105,  137, 169,
+  201, 233,  265, 297,  329, 361,  393, 425,  457, 489,  521, 553,  585, 617,
+  649, 681,  713, 745,  777, 809,  841, 873,  905, 937,  969, 1001, 10,  42,
+  74,  106,  138, 170,  202, 234,  266, 298,  330, 362,  394, 426,  458, 490,
+  522, 554,  586, 618,  650, 682,  714, 746,  778, 810,  842, 874,  906, 938,
+  970, 1002, 11,  43,   75,  107,  139, 171,  203, 235,  267, 299,  331, 363,
+  395, 427,  459, 491,  523, 555,  587, 619,  651, 683,  715, 747,  779, 811,
+  843, 875,  907, 939,  971, 1003, 12,  44,   76,  108,  140, 172,  204, 236,
+  268, 300,  332, 364,  396, 428,  460, 492,  524, 556,  588, 620,  652, 684,
+  716, 748,  780, 812,  844, 876,  908, 940,  972, 1004, 13,  45,   77,  109,
+  141, 173,  205, 237,  269, 301,  333, 365,  397, 429,  461, 493,  525, 557,
+  589, 621,  653, 685,  717, 749,  781, 813,  845, 877,  909, 941,  973, 1005,
+  14,  46,   78,  110,  142, 174,  206, 238,  270, 302,  334, 366,  398, 430,
+  462, 494,  526, 558,  590, 622,  654, 686,  718, 750,  782, 814,  846, 878,
+  910, 942,  974, 1006, 15,  47,   79,  111,  143, 175,  207, 239,  271, 303,
+  335, 367,  399, 431,  463, 495,  527, 559,  591, 623,  655, 687,  719, 751,
+  783, 815,  847, 879,  911, 943,  975, 1007, 16,  48,   80,  112,  144, 176,
+  208, 240,  272, 304,  336, 368,  400, 432,  464, 496,  528, 560,  592, 624,
+  656, 688,  720, 752,  784, 816,  848, 880,  912, 944,  976, 1008, 17,  49,
+  81,  113,  145, 177,  209, 241,  273, 305,  337, 369,  401, 433,  465, 497,
+  529, 561,  593, 625,  657, 689,  721, 753,  785, 817,  849, 881,  913, 945,
+  977, 1009, 18,  50,   82,  114,  146, 178,  210, 242,  274, 306,  338, 370,
+  402, 434,  466, 498,  530, 562,  594, 626,  658, 690,  722, 754,  786, 818,
+  850, 882,  914, 946,  978, 1010, 19,  51,   83,  115,  147, 179,  211, 243,
+  275, 307,  339, 371,  403, 435,  467, 499,  531, 563,  595, 627,  659, 691,
+  723, 755,  787, 819,  851, 883,  915, 947,  979, 1011, 20,  52,   84,  116,
+  148, 180,  212, 244,  276, 308,  340, 372,  404, 436,  468, 500,  532, 564,
+  596, 628,  660, 692,  724, 756,  788, 820,  852, 884,  916, 948,  980, 1012,
+  21,  53,   85,  117,  149, 181,  213, 245,  277, 309,  341, 373,  405, 437,
+  469, 501,  533, 565,  597, 629,  661, 693,  725, 757,  789, 821,  853, 885,
+  917, 949,  981, 1013, 22,  54,   86,  118,  150, 182,  214, 246,  278, 310,
+  342, 374,  406, 438,  470, 502,  534, 566,  598, 630,  662, 694,  726, 758,
+  790, 822,  854, 886,  918, 950,  982, 1014, 23,  55,   87,  119,  151, 183,
+  215, 247,  279, 311,  343, 375,  407, 439,  471, 503,  535, 567,  599, 631,
+  663, 695,  727, 759,  791, 823,  855, 887,  919, 951,  983, 1015, 24,  56,
+  88,  120,  152, 184,  216, 248,  280, 312,  344, 376,  408, 440,  472, 504,
+  536, 568,  600, 632,  664, 696,  728, 760,  792, 824,  856, 888,  920, 952,
+  984, 1016, 25,  57,   89,  121,  153, 185,  217, 249,  281, 313,  345, 377,
+  409, 441,  473, 505,  537, 569,  601, 633,  665, 697,  729, 761,  793, 825,
+  857, 889,  921, 953,  985, 1017, 26,  58,   90,  122,  154, 186,  218, 250,
+  282, 314,  346, 378,  410, 442,  474, 506,  538, 570,  602, 634,  666, 698,
+  730, 762,  794, 826,  858, 890,  922, 954,  986, 1018, 27,  59,   91,  123,
+  155, 187,  219, 251,  283, 315,  347, 379,  411, 443,  475, 507,  539, 571,
+  603, 635,  667, 699,  731, 763,  795, 827,  859, 891,  923, 955,  987, 1019,
+  28,  60,   92,  124,  156, 188,  220, 252,  284, 316,  348, 380,  412, 444,
+  476, 508,  540, 572,  604, 636,  668, 700,  732, 764,  796, 828,  860, 892,
+  924, 956,  988, 1020, 29,  61,   93,  125,  157, 189,  221, 253,  285, 317,
+  349, 381,  413, 445,  477, 509,  541, 573,  605, 637,  669, 701,  733, 765,
+  797, 829,  861, 893,  925, 957,  989, 1021, 30,  62,   94,  126,  158, 190,
+  222, 254,  286, 318,  350, 382,  414, 446,  478, 510,  542, 574,  606, 638,
+  670, 702,  734, 766,  798, 830,  862, 894,  926, 958,  990, 1022, 31,  63,
+  95,  127,  159, 191,  223, 255,  287, 319,  351, 383,  415, 447,  479, 511,
+  543, 575,  607, 639,  671, 703,  735, 767,  799, 831,  863, 895,  927, 959,
+  991, 1023,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_32x32[1024]) = {
+  0,    1,    2,    3,    4,    5,    6,    7,    8,    9,    10,   11,   12,
+  13,   14,   15,   16,   17,   18,   19,   20,   21,   22,   23,   24,   25,
+  26,   27,   28,   29,   30,   31,   32,   33,   34,   35,   36,   37,   38,
+  39,   40,   41,   42,   43,   44,   45,   46,   47,   48,   49,   50,   51,
+  52,   53,   54,   55,   56,   57,   58,   59,   60,   61,   62,   63,   64,
+  65,   66,   67,   68,   69,   70,   71,   72,   73,   74,   75,   76,   77,
+  78,   79,   80,   81,   82,   83,   84,   85,   86,   87,   88,   89,   90,
+  91,   92,   93,   94,   95,   96,   97,   98,   99,   100,  101,  102,  103,
+  104,  105,  106,  107,  108,  109,  110,  111,  112,  113,  114,  115,  116,
+  117,  118,  119,  120,  121,  122,  123,  124,  125,  126,  127,  128,  129,
+  130,  131,  132,  133,  134,  135,  136,  137,  138,  139,  140,  141,  142,
+  143,  144,  145,  146,  147,  148,  149,  150,  151,  152,  153,  154,  155,
+  156,  157,  158,  159,  160,  161,  162,  163,  164,  165,  166,  167,  168,
+  169,  170,  171,  172,  173,  174,  175,  176,  177,  178,  179,  180,  181,
+  182,  183,  184,  185,  186,  187,  188,  189,  190,  191,  192,  193,  194,
+  195,  196,  197,  198,  199,  200,  201,  202,  203,  204,  205,  206,  207,
+  208,  209,  210,  211,  212,  213,  214,  215,  216,  217,  218,  219,  220,
+  221,  222,  223,  224,  225,  226,  227,  228,  229,  230,  231,  232,  233,
+  234,  235,  236,  237,  238,  239,  240,  241,  242,  243,  244,  245,  246,
+  247,  248,  249,  250,  251,  252,  253,  254,  255,  256,  257,  258,  259,
+  260,  261,  262,  263,  264,  265,  266,  267,  268,  269,  270,  271,  272,
+  273,  274,  275,  276,  277,  278,  279,  280,  281,  282,  283,  284,  285,
+  286,  287,  288,  289,  290,  291,  292,  293,  294,  295,  296,  297,  298,
+  299,  300,  301,  302,  303,  304,  305,  306,  307,  308,  309,  310,  311,
+  312,  313,  314,  315,  316,  317,  318,  319,  320,  321,  322,  323,  324,
+  325,  326,  327,  328,  329,  330,  331,  332,  333,  334,  335,  336,  337,
+  338,  339,  340,  341,  342,  343,  344,  345,  346,  347,  348,  349,  350,
+  351,  352,  353,  354,  355,  356,  357,  358,  359,  360,  361,  362,  363,
+  364,  365,  366,  367,  368,  369,  370,  371,  372,  373,  374,  375,  376,
+  377,  378,  379,  380,  381,  382,  383,  384,  385,  386,  387,  388,  389,
+  390,  391,  392,  393,  394,  395,  396,  397,  398,  399,  400,  401,  402,
+  403,  404,  405,  406,  407,  408,  409,  410,  411,  412,  413,  414,  415,
+  416,  417,  418,  419,  420,  421,  422,  423,  424,  425,  426,  427,  428,
+  429,  430,  431,  432,  433,  434,  435,  436,  437,  438,  439,  440,  441,
+  442,  443,  444,  445,  446,  447,  448,  449,  450,  451,  452,  453,  454,
+  455,  456,  457,  458,  459,  460,  461,  462,  463,  464,  465,  466,  467,
+  468,  469,  470,  471,  472,  473,  474,  475,  476,  477,  478,  479,  480,
+  481,  482,  483,  484,  485,  486,  487,  488,  489,  490,  491,  492,  493,
+  494,  495,  496,  497,  498,  499,  500,  501,  502,  503,  504,  505,  506,
+  507,  508,  509,  510,  511,  512,  513,  514,  515,  516,  517,  518,  519,
+  520,  521,  522,  523,  524,  525,  526,  527,  528,  529,  530,  531,  532,
+  533,  534,  535,  536,  537,  538,  539,  540,  541,  542,  543,  544,  545,
+  546,  547,  548,  549,  550,  551,  552,  553,  554,  555,  556,  557,  558,
+  559,  560,  561,  562,  563,  564,  565,  566,  567,  568,  569,  570,  571,
+  572,  573,  574,  575,  576,  577,  578,  579,  580,  581,  582,  583,  584,
+  585,  586,  587,  588,  589,  590,  591,  592,  593,  594,  595,  596,  597,
+  598,  599,  600,  601,  602,  603,  604,  605,  606,  607,  608,  609,  610,
+  611,  612,  613,  614,  615,  616,  617,  618,  619,  620,  621,  622,  623,
+  624,  625,  626,  627,  628,  629,  630,  631,  632,  633,  634,  635,  636,
+  637,  638,  639,  640,  641,  642,  643,  644,  645,  646,  647,  648,  649,
+  650,  651,  652,  653,  654,  655,  656,  657,  658,  659,  660,  661,  662,
+  663,  664,  665,  666,  667,  668,  669,  670,  671,  672,  673,  674,  675,
+  676,  677,  678,  679,  680,  681,  682,  683,  684,  685,  686,  687,  688,
+  689,  690,  691,  692,  693,  694,  695,  696,  697,  698,  699,  700,  701,
+  702,  703,  704,  705,  706,  707,  708,  709,  710,  711,  712,  713,  714,
+  715,  716,  717,  718,  719,  720,  721,  722,  723,  724,  725,  726,  727,
+  728,  729,  730,  731,  732,  733,  734,  735,  736,  737,  738,  739,  740,
+  741,  742,  743,  744,  745,  746,  747,  748,  749,  750,  751,  752,  753,
+  754,  755,  756,  757,  758,  759,  760,  761,  762,  763,  764,  765,  766,
+  767,  768,  769,  770,  771,  772,  773,  774,  775,  776,  777,  778,  779,
+  780,  781,  782,  783,  784,  785,  786,  787,  788,  789,  790,  791,  792,
+  793,  794,  795,  796,  797,  798,  799,  800,  801,  802,  803,  804,  805,
+  806,  807,  808,  809,  810,  811,  812,  813,  814,  815,  816,  817,  818,
+  819,  820,  821,  822,  823,  824,  825,  826,  827,  828,  829,  830,  831,
+  832,  833,  834,  835,  836,  837,  838,  839,  840,  841,  842,  843,  844,
+  845,  846,  847,  848,  849,  850,  851,  852,  853,  854,  855,  856,  857,
+  858,  859,  860,  861,  862,  863,  864,  865,  866,  867,  868,  869,  870,
+  871,  872,  873,  874,  875,  876,  877,  878,  879,  880,  881,  882,  883,
+  884,  885,  886,  887,  888,  889,  890,  891,  892,  893,  894,  895,  896,
+  897,  898,  899,  900,  901,  902,  903,  904,  905,  906,  907,  908,  909,
+  910,  911,  912,  913,  914,  915,  916,  917,  918,  919,  920,  921,  922,
+  923,  924,  925,  926,  927,  928,  929,  930,  931,  932,  933,  934,  935,
+  936,  937,  938,  939,  940,  941,  942,  943,  944,  945,  946,  947,  948,
+  949,  950,  951,  952,  953,  954,  955,  956,  957,  958,  959,  960,  961,
+  962,  963,  964,  965,  966,  967,  968,  969,  970,  971,  972,  973,  974,
+  975,  976,  977,  978,  979,  980,  981,  982,  983,  984,  985,  986,  987,
+  988,  989,  990,  991,  992,  993,  994,  995,  996,  997,  998,  999,  1000,
+  1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013,
+  1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,
+};
+#endif  // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_32x32[1024]) = {
+  0,    2,    5,    10,   17,   25,   38,   47,   62,   83,   101,  121,  145,
+  170,  193,  204,  210,  219,  229,  233,  245,  257,  275,  299,  342,  356,
+  377,  405,  455,  471,  495,  527,  1,    4,    8,    15,   22,   30,   45,
+  58,   74,   92,   112,  133,  158,  184,  203,  215,  222,  228,  234,  237,
+  256,  274,  298,  317,  355,  376,  404,  426,  470,  494,  526,  551,  3,
+  7,    12,   18,   28,   36,   52,   64,   82,   102,  118,  142,  164,  189,
+  208,  217,  224,  231,  235,  238,  273,  297,  316,  329,  375,  403,  425,
+  440,  493,  525,  550,  567,  6,    11,   16,   23,   31,   43,   60,   73,
+  90,   109,  126,  150,  173,  196,  211,  220,  226,  232,  236,  239,  296,
+  315,  328,  335,  402,  424,  439,  447,  524,  549,  566,  575,  9,    14,
+  19,   29,   37,   50,   65,   78,   95,   116,  134,  157,  179,  201,  214,
+  223,  244,  255,  272,  295,  341,  354,  374,  401,  454,  469,  492,  523,
+  582,  596,  617,  645,  13,   20,   26,   35,   44,   54,   72,   85,   105,
+  123,  140,  163,  182,  205,  216,  225,  254,  271,  294,  314,  353,  373,
+  400,  423,  468,  491,  522,  548,  595,  616,  644,  666,  21,   27,   33,
+  42,   53,   63,   80,   94,   113,  132,  151,  172,  190,  209,  218,  227,
+  270,  293,  313,  327,  372,  399,  422,  438,  490,  521,  547,  565,  615,
+  643,  665,  680,  24,   32,   39,   48,   57,   71,   88,   104,  120,  139,
+  159,  178,  197,  212,  221,  230,  292,  312,  326,  334,  398,  421,  437,
+  446,  520,  546,  564,  574,  642,  664,  679,  687,  34,   40,   46,   56,
+  68,   81,   96,   111,  130,  147,  167,  186,  243,  253,  269,  291,  340,
+  352,  371,  397,  453,  467,  489,  519,  581,  594,  614,  641,  693,  705,
+  723,  747,  41,   49,   55,   67,   77,   91,   107,  124,  138,  161,  177,
+  194,  252,  268,  290,  311,  351,  370,  396,  420,  466,  488,  518,  545,
+  593,  613,  640,  663,  704,  722,  746,  765,  51,   59,   66,   76,   89,
+  99,   119,  131,  149,  168,  181,  200,  267,  289,  310,  325,  369,  395,
+  419,  436,  487,  517,  544,  563,  612,  639,  662,  678,  721,  745,  764,
+  777,  61,   69,   75,   87,   100,  114,  129,  144,  162,  180,  191,  207,
+  288,  309,  324,  333,  394,  418,  435,  445,  516,  543,  562,  573,  638,
+  661,  677,  686,  744,  763,  776,  783,  70,   79,   86,   97,   108,  122,
+  137,  155,  242,  251,  266,  287,  339,  350,  368,  393,  452,  465,  486,
+  515,  580,  592,  611,  637,  692,  703,  720,  743,  788,  798,  813,  833,
+  84,   93,   103,  110,  125,  141,  154,  171,  250,  265,  286,  308,  349,
+  367,  392,  417,  464,  485,  514,  542,  591,  610,  636,  660,  702,  719,
+  742,  762,  797,  812,  832,  848,  98,   106,  115,  127,  143,  156,  169,
+  185,  264,  285,  307,  323,  366,  391,  416,  434,  484,  513,  541,  561,
+  609,  635,  659,  676,  718,  741,  761,  775,  811,  831,  847,  858,  117,
+  128,  136,  148,  160,  175,  188,  198,  284,  306,  322,  332,  390,  415,
+  433,  444,  512,  540,  560,  572,  634,  658,  675,  685,  740,  760,  774,
+  782,  830,  846,  857,  863,  135,  146,  152,  165,  241,  249,  263,  283,
+  338,  348,  365,  389,  451,  463,  483,  511,  579,  590,  608,  633,  691,
+  701,  717,  739,  787,  796,  810,  829,  867,  875,  887,  903,  153,  166,
+  174,  183,  248,  262,  282,  305,  347,  364,  388,  414,  462,  482,  510,
+  539,  589,  607,  632,  657,  700,  716,  738,  759,  795,  809,  828,  845,
+  874,  886,  902,  915,  176,  187,  195,  202,  261,  281,  304,  321,  363,
+  387,  413,  432,  481,  509,  538,  559,  606,  631,  656,  674,  715,  737,
+  758,  773,  808,  827,  844,  856,  885,  901,  914,  923,  192,  199,  206,
+  213,  280,  303,  320,  331,  386,  412,  431,  443,  508,  537,  558,  571,
+  630,  655,  673,  684,  736,  757,  772,  781,  826,  843,  855,  862,  900,
+  913,  922,  927,  240,  247,  260,  279,  337,  346,  362,  385,  450,  461,
+  480,  507,  578,  588,  605,  629,  690,  699,  714,  735,  786,  794,  807,
+  825,  866,  873,  884,  899,  930,  936,  945,  957,  246,  259,  278,  302,
+  345,  361,  384,  411,  460,  479,  506,  536,  587,  604,  628,  654,  698,
+  713,  734,  756,  793,  806,  824,  842,  872,  883,  898,  912,  935,  944,
+  956,  966,  258,  277,  301,  319,  360,  383,  410,  430,  478,  505,  535,
+  557,  603,  627,  653,  672,  712,  733,  755,  771,  805,  823,  841,  854,
+  882,  897,  911,  921,  943,  955,  965,  972,  276,  300,  318,  330,  382,
+  409,  429,  442,  504,  534,  556,  570,  626,  652,  671,  683,  732,  754,
+  770,  780,  822,  840,  853,  861,  896,  910,  920,  926,  954,  964,  971,
+  975,  336,  344,  359,  381,  449,  459,  477,  503,  577,  586,  602,  625,
+  689,  697,  711,  731,  785,  792,  804,  821,  865,  871,  881,  895,  929,
+  934,  942,  953,  977,  981,  987,  995,  343,  358,  380,  408,  458,  476,
+  502,  533,  585,  601,  624,  651,  696,  710,  730,  753,  791,  803,  820,
+  839,  870,  880,  894,  909,  933,  941,  952,  963,  980,  986,  994,  1001,
+  357,  379,  407,  428,  475,  501,  532,  555,  600,  623,  650,  670,  709,
+  729,  752,  769,  802,  819,  838,  852,  879,  893,  908,  919,  940,  951,
+  962,  970,  985,  993,  1000, 1005, 378,  406,  427,  441,  500,  531,  554,
+  569,  622,  649,  669,  682,  728,  751,  768,  779,  818,  837,  851,  860,
+  892,  907,  918,  925,  950,  961,  969,  974,  992,  999,  1004, 1007, 448,
+  457,  474,  499,  576,  584,  599,  621,  688,  695,  708,  727,  784,  790,
+  801,  817,  864,  869,  878,  891,  928,  932,  939,  949,  976,  979,  984,
+  991,  1008, 1010, 1013, 1017, 456,  473,  498,  530,  583,  598,  620,  648,
+  694,  707,  726,  750,  789,  800,  816,  836,  868,  877,  890,  906,  931,
+  938,  948,  960,  978,  983,  990,  998,  1009, 1012, 1016, 1020, 472,  497,
+  529,  553,  597,  619,  647,  668,  706,  725,  749,  767,  799,  815,  835,
+  850,  876,  889,  905,  917,  937,  947,  959,  968,  982,  989,  997,  1003,
+  1011, 1015, 1019, 1022, 496,  528,  552,  568,  618,  646,  667,  681,  724,
+  748,  766,  778,  814,  834,  849,  859,  888,  904,  916,  924,  946,  958,
+  967,  973,  988,  996,  1002, 1006, 1014, 1018, 1021, 1023,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, vp10_v2_iscan_32x32[1024]) = {
+  0,    1,    4,    9,    15,   22,   33,   43,   56,   71,   86,   104,  121,
+  142,  166,  189,  512,  518,  527,  539,  551,  566,  584,  602,  621,  644,
+  668,  695,  721,  748,  780,  811,  2,    3,    6,    11,   17,   26,   35,
+  45,   58,   73,   90,   106,  123,  146,  168,  193,  513,  519,  528,  540,
+  553,  567,  585,  603,  622,  647,  670,  696,  722,  751,  783,  812,  5,
+  7,    8,    13,   20,   28,   37,   50,   62,   75,   92,   108,  129,  150,
+  170,  195,  514,  521,  530,  541,  554,  569,  587,  605,  625,  649,  671,
+  699,  725,  752,  785,  815,  10,   12,   14,   19,   23,   31,   41,   52,
+  65,   81,   96,   113,  133,  152,  175,  201,  515,  522,  531,  542,  556,
+  572,  589,  607,  629,  651,  673,  700,  726,  757,  788,  819,  16,   18,
+  21,   24,   30,   39,   48,   59,   69,   83,   100,  119,  137,  158,  181,
+  203,  516,  523,  534,  545,  559,  574,  591,  610,  632,  654,  679,  704,
+  730,  762,  791,  824,  25,   27,   29,   32,   40,   46,   54,   67,   79,
+  94,   109,  127,  143,  164,  185,  210,  517,  525,  535,  547,  561,  578,
+  595,  615,  635,  656,  684,  707,  737,  766,  793,  830,  34,   36,   38,
+  42,   49,   55,   64,   76,   87,   102,  117,  135,  154,  176,  197,  219,
+  520,  529,  538,  550,  565,  580,  598,  618,  639,  664,  687,  712,  741,
+  769,  802,  833,  44,   47,   51,   53,   60,   68,   77,   85,   98,   114,
+  131,  147,  162,  183,  208,  227,  524,  533,  544,  557,  571,  588,  606,
+  623,  645,  667,  692,  720,  747,  776,  806,  838,  57,   61,   63,   66,
+  70,   80,   88,   99,   112,  124,  140,  159,  179,  199,  216,  233,  526,
+  536,  548,  562,  577,  593,  613,  633,  653,  676,  701,  727,  756,  786,
+  814,  847,  72,   74,   78,   82,   84,   95,   103,  115,  125,  139,  156,
+  173,  190,  211,  229,  246,  532,  543,  555,  568,  581,  601,  619,  637,
+  663,  685,  709,  738,  763,  792,  826,  855,  89,   91,   93,   97,   101,
+  110,  118,  132,  141,  157,  171,  186,  206,  224,  241,  255,  537,  549,
+  560,  576,  592,  608,  628,  650,  669,  693,  719,  744,  773,  805,  834,
+  862,  105,  107,  111,  116,  120,  128,  136,  148,  160,  174,  187,  205,
+  221,  236,  251,  267,  546,  558,  570,  583,  600,  617,  636,  657,  680,
+  706,  729,  758,  787,  813,  846,  871,  122,  126,  130,  134,  138,  144,
+  155,  163,  180,  191,  207,  222,  232,  248,  264,  278,  552,  564,  579,
+  594,  609,  630,  648,  666,  688,  715,  742,  768,  797,  827,  856,  877,
+  145,  149,  151,  153,  161,  165,  177,  184,  200,  212,  225,  237,  249,
+  262,  275,  289,  563,  575,  590,  604,  620,  638,  660,  683,  705,  728,
+  753,  779,  809,  839,  866,  889,  167,  169,  172,  178,  182,  188,  198,
+  209,  217,  230,  242,  252,  265,  276,  288,  301,  573,  586,  599,  616,
+  634,  652,  672,  694,  716,  743,  767,  794,  825,  850,  874,  899,  192,
+  194,  196,  202,  204,  213,  220,  228,  234,  247,  256,  268,  279,  290,
+  302,  315,  582,  597,  614,  631,  646,  665,  686,  708,  732,  759,  784,
+  810,  837,  863,  886,  908,  214,  215,  218,  223,  226,  231,  239,  244,
+  253,  261,  271,  283,  292,  304,  317,  325,  596,  611,  626,  642,  661,
+  681,  702,  723,  745,  770,  800,  828,  853,  875,  897,  919,  235,  238,
+  240,  243,  245,  250,  257,  263,  270,  280,  287,  298,  307,  319,  329,
+  340,  612,  624,  640,  658,  677,  697,  717,  739,  764,  789,  816,  844,
+  867,  890,  909,  927,  254,  258,  259,  260,  266,  269,  272,  282,  286,
+  296,  303,  312,  323,  333,  341,  355,  627,  641,  655,  674,  690,  713,
+  735,  760,  781,  807,  835,  857,  880,  902,  921,  940,  273,  274,  277,
+  281,  284,  285,  291,  299,  305,  310,  320,  327,  337,  346,  357,  369,
+  643,  659,  675,  689,  710,  733,  754,  777,  803,  831,  851,  872,  892,
+  913,  934,  950,  293,  294,  295,  297,  300,  306,  308,  314,  321,  326,
+  335,  343,  352,  361,  372,  378,  662,  678,  691,  711,  731,  749,  774,
+  798,  822,  848,  869,  887,  906,  925,  942,  961,  309,  311,  313,  316,
+  318,  322,  324,  332,  338,  344,  351,  358,  367,  375,  386,  394,  682,
+  698,  714,  734,  750,  772,  795,  820,  842,  864,  884,  904,  923,  938,
+  954,  967,  328,  330,  331,  334,  336,  339,  342,  348,  354,  359,  366,
+  374,  382,  391,  400,  409,  703,  718,  736,  755,  775,  796,  818,  840,
+  860,  882,  900,  917,  936,  952,  965,  977,  345,  347,  349,  350,  353,
+  356,  360,  364,  371,  376,  383,  389,  395,  406,  412,  423,  724,  740,
+  761,  778,  799,  821,  841,  859,  878,  895,  915,  932,  948,  963,  975,
+  986,  362,  363,  365,  368,  370,  373,  377,  379,  387,  392,  397,  405,
+  411,  420,  428,  439,  746,  765,  782,  804,  823,  843,  861,  879,  894,
+  911,  930,  946,  959,  973,  984,  994,  380,  381,  384,  385,  388,  390,
+  393,  396,  403,  408,  413,  422,  427,  436,  444,  452,  771,  790,  808,
+  832,  849,  865,  883,  896,  912,  928,  944,  957,  971,  982,  992,  1001,
+  398,  399,  401,  402,  404,  407,  410,  414,  419,  425,  429,  437,  442,
+  449,  458,  465,  801,  817,  836,  852,  870,  885,  901,  916,  931,  945,
+  956,  969,  980,  990,  999,  1007, 415,  416,  417,  418,  421,  424,  426,
+  430,  434,  441,  445,  453,  459,  463,  473,  480,  829,  845,  858,  873,
+  888,  905,  918,  933,  947,  958,  970,  979,  988,  997,  1005, 1012, 431,
+  432,  433,  435,  438,  440,  443,  446,  451,  456,  461,  468,  475,  479,
+  488,  494,  854,  868,  881,  893,  907,  924,  937,  949,  960,  972,  981,
+  989,  996,  1003, 1010, 1016, 447,  448,  450,  454,  455,  457,  460,  462,
+  469,  472,  477,  482,  490,  495,  499,  503,  876,  891,  903,  914,  926,
+  939,  953,  964,  974,  983,  991,  998,  1004, 1009, 1014, 1019, 464,  466,
+  467,  470,  471,  474,  476,  478,  484,  489,  493,  497,  501,  504,  506,
+  508,  898,  910,  922,  935,  943,  955,  966,  976,  985,  993,  1000, 1006,
+  1011, 1015, 1018, 1021, 481,  483,  485,  486,  487,  491,  492,  496,  498,
+  500,  502,  505,  507,  509,  510,  511,  920,  929,  941,  951,  962,  968,
+  978,  987,  995,  1002, 1008, 1013, 1017, 1020, 1022, 1023,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_h2_iscan_32x32[1024]) = {
+  0,    1,    4,    9,    15,   22,   33,   43,   56,   71,   86,   104,  121,
+  142,  166,  189,  214,  233,  254,  273,  292,  309,  328,  345,  362,  378,
+  397,  415,  431,  447,  464,  481,  2,    3,    6,    11,   17,   26,   35,
+  45,   58,   73,   90,   106,  123,  146,  168,  193,  215,  236,  255,  274,
+  294,  310,  329,  346,  363,  381,  399,  416,  432,  448,  465,  482,  5,
+  7,    8,    13,   20,   28,   37,   50,   62,   75,   92,   108,  129,  150,
+  170,  195,  216,  240,  259,  275,  295,  312,  331,  348,  365,  383,  400,
+  417,  433,  449,  467,  485,  10,   12,   14,   19,   23,   31,   41,   52,
+  65,   81,   96,   113,  133,  152,  175,  201,  221,  243,  260,  280,  297,
+  315,  333,  350,  367,  385,  402,  418,  434,  452,  470,  486,  16,   18,
+  21,   24,   30,   39,   48,   59,   69,   83,   100,  119,  137,  158,  181,
+  203,  226,  244,  264,  283,  300,  318,  335,  353,  370,  388,  404,  420,
+  438,  455,  471,  487,  25,   27,   29,   32,   40,   46,   54,   67,   79,
+  94,   109,  127,  143,  164,  185,  210,  231,  250,  269,  285,  304,  322,
+  339,  356,  373,  389,  407,  423,  440,  457,  473,  491,  34,   36,   38,
+  42,   49,   55,   64,   76,   87,   102,  117,  135,  154,  176,  197,  219,
+  239,  256,  272,  291,  308,  324,  341,  359,  377,  393,  410,  426,  442,
+  460,  476,  492,  44,   47,   51,   53,   60,   68,   77,   85,   98,   114,
+  131,  147,  162,  183,  208,  227,  245,  262,  282,  298,  314,  332,  349,
+  364,  379,  396,  412,  430,  446,  462,  478,  495,  57,   61,   63,   66,
+  70,   80,   88,   99,   112,  124,  140,  159,  179,  199,  217,  234,  253,
+  270,  286,  305,  321,  337,  354,  371,  387,  403,  419,  435,  451,  468,
+  484,  498,  72,   74,   78,   82,   84,   95,   103,  115,  125,  139,  156,
+  173,  190,  211,  229,  246,  261,  281,  296,  311,  325,  344,  360,  375,
+  392,  408,  425,  441,  456,  472,  489,  500,  89,   91,   93,   97,   101,
+  110,  118,  132,  141,  157,  171,  186,  206,  224,  241,  257,  271,  287,
+  303,  320,  336,  351,  366,  384,  398,  413,  429,  445,  461,  477,  493,
+  502,  105,  107,  111,  116,  120,  128,  136,  148,  160,  174,  187,  205,
+  222,  237,  251,  267,  284,  299,  313,  327,  343,  358,  374,  390,  405,
+  422,  437,  453,  469,  483,  497,  505,  122,  126,  130,  134,  138,  144,
+  155,  163,  180,  191,  207,  223,  232,  248,  265,  278,  293,  307,  323,
+  338,  352,  368,  382,  395,  411,  427,  443,  459,  475,  490,  501,  507,
+  145,  149,  151,  153,  161,  165,  177,  184,  200,  212,  225,  238,  249,
+  263,  276,  289,  306,  319,  334,  347,  361,  376,  391,  406,  421,  436,
+  450,  463,  479,  496,  504,  509,  167,  169,  172,  178,  182,  188,  198,
+  209,  218,  230,  242,  252,  266,  277,  288,  301,  317,  330,  342,  357,
+  372,  386,  401,  414,  428,  444,  458,  474,  488,  499,  506,  510,  192,
+  194,  196,  202,  204,  213,  220,  228,  235,  247,  258,  268,  279,  290,
+  302,  316,  326,  340,  355,  369,  380,  394,  409,  424,  439,  454,  466,
+  480,  494,  503,  508,  511,  512,  513,  514,  515,  516,  517,  520,  523,
+  526,  532,  537,  545,  551,  561,  573,  581,  596,  610,  625,  642,  661,
+  680,  701,  722,  745,  770,  800,  827,  853,  875,  897,  919,  518,  519,
+  521,  522,  524,  525,  528,  533,  536,  542,  549,  557,  564,  575,  585,
+  597,  611,  623,  640,  656,  676,  696,  717,  739,  763,  789,  815,  844,
+  867,  889,  909,  927,  527,  529,  530,  531,  534,  535,  538,  544,  548,
+  555,  560,  569,  579,  589,  598,  614,  626,  641,  655,  673,  690,  712,
+  735,  760,  780,  806,  834,  857,  880,  902,  921,  940,  539,  540,  541,
+  543,  546,  547,  550,  558,  562,  567,  576,  583,  593,  603,  616,  631,
+  643,  657,  674,  689,  710,  733,  752,  776,  803,  830,  850,  872,  892,
+  913,  934,  950,  552,  553,  554,  556,  559,  563,  565,  571,  577,  582,
+  591,  600,  609,  620,  634,  644,  662,  677,  691,  711,  730,  748,  773,
+  798,  822,  847,  869,  887,  906,  925,  942,  961,  566,  568,  570,  572,
+  574,  578,  580,  588,  594,  601,  608,  617,  629,  637,  652,  665,  681,
+  697,  713,  734,  749,  772,  793,  819,  842,  863,  884,  904,  923,  938,
+  954,  967,  584,  586,  587,  590,  592,  595,  599,  605,  613,  618,  628,
+  636,  648,  660,  671,  686,  702,  718,  736,  753,  774,  794,  818,  840,
+  860,  882,  900,  917,  936,  952,  965,  977,  602,  604,  606,  607,  612,
+  615,  619,  624,  633,  638,  649,  658,  666,  683,  692,  707,  723,  740,
+  761,  777,  799,  820,  841,  859,  877,  895,  915,  932,  948,  963,  975,
+  986,  621,  622,  627,  630,  632,  635,  639,  645,  653,  663,  668,  682,
+  688,  704,  716,  732,  746,  764,  781,  804,  823,  843,  861,  878,  894,
+  911,  930,  946,  959,  973,  984,  994,  646,  647,  650,  651,  654,  659,
+  664,  667,  678,  685,  693,  706,  715,  728,  743,  757,  771,  790,  807,
+  831,  848,  864,  883,  896,  912,  928,  944,  957,  971,  982,  992,  1001,
+  669,  670,  672,  675,  679,  684,  687,  694,  703,  709,  719,  729,  741,
+  754,  767,  783,  801,  816,  835,  851,  870,  885,  901,  916,  931,  945,
+  956,  969,  980,  990,  999,  1007, 695,  698,  699,  700,  705,  708,  714,
+  720,  726,  738,  744,  758,  768,  779,  795,  810,  828,  845,  858,  873,
+  888,  905,  918,  933,  947,  958,  970,  979,  988,  997,  1005, 1012, 721,
+  724,  725,  727,  731,  737,  742,  747,  756,  765,  775,  786,  797,  809,
+  825,  837,  854,  868,  881,  893,  907,  924,  937,  949,  960,  972,  981,
+  989,  996,  1003, 1010, 1016, 750,  751,  755,  759,  762,  766,  769,  778,
+  787,  792,  805,  812,  829,  838,  852,  865,  876,  890,  903,  914,  926,
+  939,  953,  964,  974,  983,  991,  998,  1004, 1009, 1014, 1019, 782,  784,
+  785,  788,  791,  796,  802,  808,  814,  826,  836,  846,  856,  866,  874,
+  886,  898,  910,  922,  935,  943,  955,  966,  976,  985,  993,  1000, 1006,
+  1011, 1015, 1018, 1021, 811,  813,  817,  821,  824,  832,  833,  839,  849,
+  855,  862,  871,  879,  891,  899,  908,  920,  929,  941,  951,  962,  968,
+  978,  987,  995,  1002, 1008, 1013, 1017, 1020, 1022, 1023,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_qtr_iscan_32x32[1024]) = {
+  0,    1,    4,    9,    15,   22,   33,   43,   56,   71,   86,   104,  121,
+  142,  166,  189,  256,  268,  286,  310,  334,  364,  400,  435,  471,  510,
+  553,  598,  640,  683,  732,  780,  2,    3,    6,    11,   17,   26,   35,
+  45,   58,   73,   90,   106,  123,  146,  168,  193,  258,  270,  288,  312,
+  338,  366,  402,  437,  473,  516,  557,  600,  642,  687,  736,  782,  5,
+  7,    8,    13,   20,   28,   37,   50,   62,   75,   92,   108,  129,  150,
+  170,  195,  260,  274,  292,  314,  340,  370,  406,  441,  478,  520,  559,
+  604,  646,  689,  740,  788,  10,   12,   14,   19,   23,   31,   41,   52,
+  65,   81,   96,   113,  133,  152,  175,  201,  262,  276,  294,  316,  344,
+  376,  410,  445,  484,  524,  563,  606,  648,  697,  746,  793,  16,   18,
+  21,   24,   30,   39,   48,   59,   69,   83,   100,  119,  137,  158,  181,
+  203,  264,  278,  300,  322,  350,  380,  414,  451,  490,  530,  571,  612,
+  656,  705,  750,  799,  25,   27,   29,   32,   40,   46,   54,   67,   79,
+  94,   109,  127,  143,  164,  185,  210,  266,  282,  302,  326,  354,  388,
+  422,  459,  496,  533,  579,  618,  665,  711,  754,  809,  34,   36,   38,
+  42,   49,   55,   64,   76,   87,   102,  117,  135,  154,  176,  197,  216,
+  272,  289,  308,  332,  362,  392,  427,  465,  504,  545,  585,  626,  671,
+  717,  766,  813,  44,   47,   51,   53,   60,   68,   77,   85,   98,   114,
+  131,  147,  162,  183,  208,  222,  279,  298,  320,  346,  374,  408,  442,
+  475,  511,  551,  592,  638,  681,  726,  772,  821,  57,   61,   63,   66,
+  70,   80,   88,   99,   112,  124,  140,  159,  179,  199,  214,  227,  284,
+  304,  328,  355,  386,  418,  455,  492,  528,  567,  608,  649,  695,  742,
+  786,  833,  72,   74,   78,   82,   84,   95,   103,  115,  125,  139,  156,
+  173,  190,  211,  224,  233,  296,  317,  342,  367,  394,  433,  466,  500,
+  543,  581,  622,  667,  707,  752,  803,  843,  89,   91,   93,   97,   101,
+  110,  118,  132,  141,  157,  171,  186,  206,  220,  231,  239,  306,  330,
+  352,  384,  415,  447,  482,  521,  554,  593,  636,  677,  722,  770,  815,
+  852,  105,  107,  111,  116,  120,  128,  136,  148,  160,  174,  187,  205,
+  218,  229,  237,  244,  323,  347,  371,  398,  431,  463,  498,  534,  573,
+  616,  654,  698,  743,  783,  831,  864,  122,  126,  130,  134,  138,  144,
+  155,  163,  180,  191,  207,  219,  226,  235,  242,  248,  335,  360,  390,
+  419,  449,  485,  518,  549,  587,  630,  672,  715,  760,  805,  845,  872,
+  145,  149,  151,  153,  161,  165,  177,  184,  200,  212,  221,  230,  236,
+  241,  246,  251,  356,  382,  411,  438,  469,  501,  539,  577,  613,  652,
+  690,  730,  776,  822,  858,  886,  167,  169,  172,  178,  182,  188,  198,
+  209,  215,  225,  232,  238,  243,  247,  250,  253,  378,  403,  428,  461,
+  494,  526,  560,  594,  632,  675,  713,  755,  801,  837,  868,  897,  192,
+  194,  196,  202,  204,  213,  217,  223,  228,  234,  240,  245,  249,  252,
+  254,  255,  395,  425,  457,  488,  512,  547,  583,  619,  659,  699,  737,
+  778,  819,  854,  882,  907,  257,  259,  261,  263,  265,  267,  273,  280,
+  285,  297,  307,  324,  336,  357,  379,  396,  424,  452,  479,  508,  541,
+  574,  609,  643,  679,  719,  764,  806,  841,  870,  895,  919,  269,  271,
+  275,  277,  281,  283,  290,  299,  305,  318,  331,  348,  361,  383,  404,
+  426,  453,  476,  506,  535,  568,  601,  634,  669,  708,  748,  789,  829,
+  860,  887,  909,  927,  287,  291,  293,  295,  301,  303,  309,  321,  329,
+  343,  353,  372,  391,  412,  429,  458,  480,  507,  532,  564,  590,  627,
+  663,  703,  733,  773,  816,  847,  876,  901,  921,  940,  311,  313,  315,
+  319,  325,  327,  333,  349,  358,  368,  385,  399,  420,  439,  462,  489,
+  509,  536,  565,  589,  624,  661,  691,  727,  768,  810,  838,  866,  890,
+  913,  934,  950,  337,  339,  341,  345,  351,  359,  363,  375,  387,  397,
+  416,  432,  450,  470,  495,  513,  542,  569,  591,  625,  657,  684,  723,
+  762,  797,  834,  862,  884,  905,  925,  942,  961,  365,  369,  373,  377,
+  381,  389,  393,  409,  421,  434,  448,  464,  486,  502,  527,  548,  575,
+  602,  628,  662,  685,  721,  756,  794,  827,  855,  880,  903,  923,  938,
+  954,  967,  401,  405,  407,  413,  417,  423,  430,  443,  456,  467,  483,
+  499,  519,  540,  561,  584,  610,  635,  664,  692,  724,  757,  792,  825,
+  850,  878,  899,  917,  936,  952,  965,  977,  436,  440,  444,  446,  454,
+  460,  468,  477,  493,  503,  522,  537,  550,  578,  595,  620,  644,  670,
+  704,  728,  763,  795,  826,  849,  873,  893,  915,  932,  948,  963,  975,
+  986,  472,  474,  481,  487,  491,  497,  505,  514,  529,  544,  555,  576,
+  588,  614,  633,  660,  680,  709,  734,  769,  798,  828,  851,  874,  892,
+  911,  930,  946,  959,  973,  984,  994,  515,  517,  523,  525,  531,  538,
+  546,  552,  570,  582,  596,  617,  631,  653,  676,  700,  720,  749,  774,
+  811,  835,  856,  879,  894,  912,  928,  944,  957,  971,  982,  992,  1001,
+  556,  558,  562,  566,  572,  580,  586,  597,  611,  623,  637,  655,  673,
+  693,  714,  738,  765,  790,  817,  839,  863,  881,  900,  916,  931,  945,
+  956,  969,  980,  990,  999,  1007, 599,  603,  605,  607,  615,  621,  629,
+  639,  650,  668,  678,  701,  716,  731,  758,  779,  807,  830,  848,  867,
+  885,  904,  918,  933,  947,  958,  970,  979,  988,  997,  1005, 1012, 641,
+  645,  647,  651,  658,  666,  674,  682,  696,  710,  725,  744,  761,  777,
+  802,  820,  842,  861,  877,  891,  906,  924,  937,  949,  960,  972,  981,
+  989,  996,  1003, 1010, 1016, 686,  688,  694,  702,  706,  712,  718,  729,
+  745,  753,  771,  784,  808,  823,  840,  857,  871,  888,  902,  914,  926,
+  939,  953,  964,  974,  983,  991,  998,  1004, 1009, 1014, 1019, 735,  739,
+  741,  747,  751,  759,  767,  775,  787,  804,  818,  832,  846,  859,  869,
+  883,  896,  910,  922,  935,  943,  955,  966,  976,  985,  993,  1000, 1006,
+  1011, 1015, 1018, 1021, 781,  785,  791,  796,  800,  812,  814,  824,  836,
+  844,  853,  865,  875,  889,  898,  908,  920,  929,  941,  951,  962,  968,
+  978,  987,  995,  1002, 1008, 1013, 1017, 1020, 1022, 1023,
+};
+#endif  // CONFIG_EXT_TX
+
+const scan_order vp10_default_scan_orders[TX_SIZES] = {
+  { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+  { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+  { default_scan_16x16, vp10_default_iscan_16x16,
+    default_scan_16x16_neighbors },
+  { default_scan_32x32, vp10_default_iscan_32x32,
+    default_scan_32x32_neighbors },
+};
+
+#if CONFIG_EXT_TX
+const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = {
+  {
+      // TX_4X4
+      { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+      { row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors },
+      { col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors },
+      { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+      { mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
+      { row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors },
+      { col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors },
+      { row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors },
+      { col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors },
+      { row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors },
+      { col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors },
+  },
+  {
+      // TX_8X8
+      { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+      { row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors },
+      { col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors },
+      { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+      { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+      { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+      { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+      { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+      { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+      { mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors },
+      { row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors },
+      { col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors },
+      { row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors },
+      { col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors },
+      { row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors },
+      { col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors },
+  },
+  {
+      // TX_16X16
+      { default_scan_16x16, vp10_default_iscan_16x16,
+        default_scan_16x16_neighbors },
+      { row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors },
+      { col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors },
+      { default_scan_16x16, vp10_default_iscan_16x16,
+        default_scan_16x16_neighbors },
+      { default_scan_16x16, vp10_default_iscan_16x16,
+        default_scan_16x16_neighbors },
+      { default_scan_16x16, vp10_default_iscan_16x16,
+        default_scan_16x16_neighbors },
+      { default_scan_16x16, vp10_default_iscan_16x16,
+        default_scan_16x16_neighbors },
+      { default_scan_16x16, vp10_default_iscan_16x16,
+        default_scan_16x16_neighbors },
+      { default_scan_16x16, vp10_default_iscan_16x16,
+        default_scan_16x16_neighbors },
+      { mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors },
+      { row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors },
+      { col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors },
+      { row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors },
+      { col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors },
+      { row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors },
+      { col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors },
+  },
+  {
+      // TX_32X32
+      { default_scan_32x32, vp10_default_iscan_32x32,
+        default_scan_32x32_neighbors },
+      { h2_scan_32x32, vp10_h2_iscan_32x32, h2_scan_32x32_neighbors },
+      { v2_scan_32x32, vp10_v2_iscan_32x32, v2_scan_32x32_neighbors },
+      { qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
+      { h2_scan_32x32, vp10_h2_iscan_32x32, h2_scan_32x32_neighbors },
+      { v2_scan_32x32, vp10_v2_iscan_32x32, v2_scan_32x32_neighbors },
+      { qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
+      { qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
+      { qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
+      { mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+      { mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+      { mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
+      { mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+      { mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
+      { mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+      { mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
+  }
+};
+
+const scan_order vp10_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
+  {
+      // TX_4X4
+      { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+      { mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
+      { mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
+      { mcol_scan_4x4, vp10_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
+      { mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
+      { mcol_scan_4x4, vp10_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
+      { mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
+      { mcol_scan_4x4, vp10_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
+  },
+  {
+      // TX_8X8
+      { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+      { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+      { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+      { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+      { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+      { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+      { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+      { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+      { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+      { mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors },
+      { mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors },
+      { mcol_scan_8x8, vp10_mcol_iscan_8x8, mcol_scan_8x8_neighbors },
+      { mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors },
+      { mcol_scan_8x8, vp10_mcol_iscan_8x8, mcol_scan_8x8_neighbors },
+      { mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors },
+      { mcol_scan_8x8, vp10_mcol_iscan_8x8, mcol_scan_8x8_neighbors },
+  },
+  {
+      // TX_16X16
+      { default_scan_16x16, vp10_default_iscan_16x16,
+        default_scan_16x16_neighbors },
+      { default_scan_16x16, vp10_default_iscan_16x16,
+        default_scan_16x16_neighbors },
+      { default_scan_16x16, vp10_default_iscan_16x16,
+        default_scan_16x16_neighbors },
+      { default_scan_16x16, vp10_default_iscan_16x16,
+        default_scan_16x16_neighbors },
+      { default_scan_16x16, vp10_default_iscan_16x16,
+        default_scan_16x16_neighbors },
+      { default_scan_16x16, vp10_default_iscan_16x16,
+        default_scan_16x16_neighbors },
+      { default_scan_16x16, vp10_default_iscan_16x16,
+        default_scan_16x16_neighbors },
+      { default_scan_16x16, vp10_default_iscan_16x16,
+        default_scan_16x16_neighbors },
+      { default_scan_16x16, vp10_default_iscan_16x16,
+        default_scan_16x16_neighbors },
+      { mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors },
+      { mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors },
+      { mcol_scan_16x16, vp10_mcol_iscan_16x16, mcol_scan_16x16_neighbors },
+      { mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors },
+      { mcol_scan_16x16, vp10_mcol_iscan_16x16, mcol_scan_16x16_neighbors },
+      { mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors },
+      { mcol_scan_16x16, vp10_mcol_iscan_16x16, mcol_scan_16x16_neighbors },
+  },
+  {
+      // TX_32X32
+      { default_scan_32x32, vp10_default_iscan_32x32,
+        default_scan_32x32_neighbors },
+      { h2_scan_32x32, vp10_h2_iscan_32x32, h2_scan_32x32_neighbors },
+      { v2_scan_32x32, vp10_v2_iscan_32x32, v2_scan_32x32_neighbors },
+      { qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
+      { h2_scan_32x32, vp10_h2_iscan_32x32, h2_scan_32x32_neighbors },
+      { v2_scan_32x32, vp10_v2_iscan_32x32, v2_scan_32x32_neighbors },
+      { qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
+      { qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
+      { qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
+      { mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+      { mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+      { mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
+      { mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+      { mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
+      { mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+      { mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
+  },
+  {
+      // TX_4X8
+      { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors },
+      { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors },
+      { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors },
+      { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors },
+      { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors },
+      { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors },
+      { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors },
+      { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors },
+      { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors },
+      { mrow_scan_4x8, vp10_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
+      { mrow_scan_4x8, vp10_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
+      { mcol_scan_4x8, vp10_mcol_iscan_4x8, mcol_scan_4x8_neighbors },
+      { mrow_scan_4x8, vp10_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
+      { mcol_scan_4x8, vp10_mcol_iscan_4x8, mcol_scan_4x8_neighbors },
+      { mrow_scan_4x8, vp10_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
+      { mcol_scan_4x8, vp10_mcol_iscan_4x8, mcol_scan_4x8_neighbors },
+  },
+  {
+      // TX_8X4
+      { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors },
+      { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors },
+      { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors },
+      { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors },
+      { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors },
+      { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors },
+      { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors },
+      { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors },
+      { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors },
+      { mrow_scan_8x4, vp10_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
+      { mrow_scan_8x4, vp10_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
+      { mcol_scan_8x4, vp10_mcol_iscan_8x4, mcol_scan_8x4_neighbors },
+      { mrow_scan_8x4, vp10_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
+      { mcol_scan_8x4, vp10_mcol_iscan_8x4, mcol_scan_8x4_neighbors },
+      { mrow_scan_8x4, vp10_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
+      { mcol_scan_8x4, vp10_mcol_iscan_8x4, mcol_scan_8x4_neighbors },
+  },
+  {
+      // TX_8X16
+      { default_scan_8x16, vp10_default_iscan_8x16,
+        default_scan_8x16_neighbors },
+      { default_scan_8x16, vp10_default_iscan_8x16,
+        default_scan_8x16_neighbors },
+      { default_scan_8x16, vp10_default_iscan_8x16,
+        default_scan_8x16_neighbors },
+      { default_scan_8x16, vp10_default_iscan_8x16,
+        default_scan_8x16_neighbors },
+      { default_scan_8x16, vp10_default_iscan_8x16,
+        default_scan_8x16_neighbors },
+      { default_scan_8x16, vp10_default_iscan_8x16,
+        default_scan_8x16_neighbors },
+      { default_scan_8x16, vp10_default_iscan_8x16,
+        default_scan_8x16_neighbors },
+      { default_scan_8x16, vp10_default_iscan_8x16,
+        default_scan_8x16_neighbors },
+      { default_scan_8x16, vp10_default_iscan_8x16,
+        default_scan_8x16_neighbors },
+      { mrow_scan_8x16, vp10_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
+      { mrow_scan_8x16, vp10_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
+      { mcol_scan_8x16, vp10_mcol_iscan_8x16, mcol_scan_8x16_neighbors },
+      { mrow_scan_8x16, vp10_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
+      { mcol_scan_8x16, vp10_mcol_iscan_8x16, mcol_scan_8x16_neighbors },
+      { mrow_scan_8x16, vp10_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
+      { mcol_scan_8x16, vp10_mcol_iscan_8x16, mcol_scan_8x16_neighbors },
+  },
+  {
+      // TX_16X8
+      { default_scan_16x8, vp10_default_iscan_16x8,
+        default_scan_16x8_neighbors },
+      { default_scan_16x8, vp10_default_iscan_16x8,
+        default_scan_16x8_neighbors },
+      { default_scan_16x8, vp10_default_iscan_16x8,
+        default_scan_16x8_neighbors },
+      { default_scan_16x8, vp10_default_iscan_16x8,
+        default_scan_16x8_neighbors },
+      { default_scan_16x8, vp10_default_iscan_16x8,
+        default_scan_16x8_neighbors },
+      { default_scan_16x8, vp10_default_iscan_16x8,
+        default_scan_16x8_neighbors },
+      { default_scan_16x8, vp10_default_iscan_16x8,
+        default_scan_16x8_neighbors },
+      { default_scan_16x8, vp10_default_iscan_16x8,
+        default_scan_16x8_neighbors },
+      { default_scan_16x8, vp10_default_iscan_16x8,
+        default_scan_16x8_neighbors },
+      { mrow_scan_16x8, vp10_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
+      { mrow_scan_16x8, vp10_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
+      { mcol_scan_16x8, vp10_mcol_iscan_16x8, mcol_scan_16x8_neighbors },
+      { mrow_scan_16x8, vp10_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
+      { mcol_scan_16x8, vp10_mcol_iscan_16x8, mcol_scan_16x8_neighbors },
+      { mrow_scan_16x8, vp10_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
+      { mcol_scan_16x8, vp10_mcol_iscan_16x8, mcol_scan_16x8_neighbors },
+  },
+  {
+      // TX_16X32
+      { default_scan_16x32, vp10_default_iscan_16x32,
+        default_scan_16x32_neighbors },
+      { default_scan_16x32, vp10_default_iscan_16x32,
+        default_scan_16x32_neighbors },
+      { default_scan_16x32, vp10_default_iscan_16x32,
+        default_scan_16x32_neighbors },
+      { default_scan_16x32, vp10_default_iscan_16x32,
+        default_scan_16x32_neighbors },
+      { default_scan_16x32, vp10_default_iscan_16x32,
+        default_scan_16x32_neighbors },
+      { default_scan_16x32, vp10_default_iscan_16x32,
+        default_scan_16x32_neighbors },
+      { default_scan_16x32, vp10_default_iscan_16x32,
+        default_scan_16x32_neighbors },
+      { default_scan_16x32, vp10_default_iscan_16x32,
+        default_scan_16x32_neighbors },
+      { default_scan_16x32, vp10_default_iscan_16x32,
+        default_scan_16x32_neighbors },
+      { mrow_scan_16x32, vp10_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
+      { mrow_scan_16x32, vp10_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
+      { mcol_scan_16x32, vp10_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
+      { mrow_scan_16x32, vp10_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
+      { mcol_scan_16x32, vp10_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
+      { mrow_scan_16x32, vp10_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
+      { mcol_scan_16x32, vp10_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
+  },
+  {
+      // TX_32X16
+      { default_scan_32x16, vp10_default_iscan_32x16,
+        default_scan_32x16_neighbors },
+      { default_scan_32x16, vp10_default_iscan_32x16,
+        default_scan_32x16_neighbors },
+      { default_scan_32x16, vp10_default_iscan_32x16,
+        default_scan_32x16_neighbors },
+      { default_scan_32x16, vp10_default_iscan_32x16,
+        default_scan_32x16_neighbors },
+      { default_scan_32x16, vp10_default_iscan_32x16,
+        default_scan_32x16_neighbors },
+      { default_scan_32x16, vp10_default_iscan_32x16,
+        default_scan_32x16_neighbors },
+      { default_scan_32x16, vp10_default_iscan_32x16,
+        default_scan_32x16_neighbors },
+      { default_scan_32x16, vp10_default_iscan_32x16,
+        default_scan_32x16_neighbors },
+      { default_scan_32x16, vp10_default_iscan_32x16,
+        default_scan_32x16_neighbors },
+      { mrow_scan_32x16, vp10_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
+      { mrow_scan_32x16, vp10_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
+      { mcol_scan_32x16, vp10_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
+      { mrow_scan_32x16, vp10_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
+      { mcol_scan_32x16, vp10_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
+      { mrow_scan_32x16, vp10_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
+      { mcol_scan_32x16, vp10_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
+  }
+};
+
+#else   // CONFIG_EXT_TX
+
+const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = {
+  { // TX_4X4
+    { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+    { row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors },
+    { col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors },
+    { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors } },
+  { // TX_8X8
+    { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+    { row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors },
+    { col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors },
+    { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors } },
+  { // TX_16X16
+    { default_scan_16x16, vp10_default_iscan_16x16,
+      default_scan_16x16_neighbors },
+    { row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors },
+    { col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors },
+    { default_scan_16x16, vp10_default_iscan_16x16,
+      default_scan_16x16_neighbors } },
+  {
+      // TX_32X32
+      { default_scan_32x32, vp10_default_iscan_32x32,
+        default_scan_32x32_neighbors },
+      { default_scan_32x32, vp10_default_iscan_32x32,
+        default_scan_32x32_neighbors },
+      { default_scan_32x32, vp10_default_iscan_32x32,
+        default_scan_32x32_neighbors },
+      { default_scan_32x32, vp10_default_iscan_32x32,
+        default_scan_32x32_neighbors },
+  }
+};
+#endif  // CONFIG_EXT_TX
diff --git a/av1/common/scan.h b/av1/common/scan.h
new file mode 100644
index 0000000..d2d9f35
--- /dev/null
+++ b/av1/common/scan.h
@@ -0,0 +1,71 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_SCAN_H_
+#define VP10_COMMON_SCAN_H_
+
+#include "aom/vpx_integer.h"
+#include "aom_ports/mem.h"
+
+#include "av1/common/enums.h"
+#include "av1/common/blockd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_NEIGHBORS 2
+
+typedef struct {
+  const int16_t *scan;
+  const int16_t *iscan;
+  const int16_t *neighbors;
+} scan_order;
+
+extern const scan_order vp10_default_scan_orders[TX_SIZES];
+extern const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES];
+
+static INLINE int get_coef_context(const int16_t *neighbors,
+                                   const uint8_t *token_cache, int c) {
+  return (1 + token_cache[neighbors[MAX_NEIGHBORS * c + 0]] +
+          token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >>
+         1;
+}
+
+static INLINE const scan_order *get_intra_scan(TX_SIZE tx_size,
+                                               TX_TYPE tx_type) {
+  return &vp10_intra_scan_orders[tx_size][tx_type];
+}
+
+#if CONFIG_EXT_TX
+extern const scan_order vp10_inter_scan_orders[TX_SIZES_ALL][TX_TYPES];
+
+static INLINE const scan_order *get_inter_scan(TX_SIZE tx_size,
+                                               TX_TYPE tx_type) {
+  return &vp10_inter_scan_orders[tx_size][tx_type];
+}
+#endif  // CONFIG_EXT_TX
+
+static INLINE const scan_order *get_scan(TX_SIZE tx_size, TX_TYPE tx_type,
+                                         int is_inter) {
+#if CONFIG_EXT_TX
+  return is_inter ? &vp10_inter_scan_orders[tx_size][tx_type]
+                  : &vp10_intra_scan_orders[tx_size][tx_type];
+#else
+  (void)is_inter;
+  return &vp10_intra_scan_orders[tx_size][tx_type];
+#endif  // CONFIG_EXT_TX
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_SCAN_H_
diff --git a/av1/common/seg_common.c b/av1/common/seg_common.c
new file mode 100644
index 0000000..f131c7b
--- /dev/null
+++ b/av1/common/seg_common.c
@@ -0,0 +1,61 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "av1/common/blockd.h"
+#include "av1/common/loopfilter.h"
+#include "av1/common/seg_common.h"
+#include "av1/common/quant_common.h"
+
+static const int seg_feature_data_signed[SEG_LVL_MAX] = { 1, 1, 0, 0 };
+
+static const int seg_feature_data_max[SEG_LVL_MAX] = { MAXQ, MAX_LOOP_FILTER, 3,
+                                                       0 };
+
+// These functions provide access to new segment level features.
+// Eventually these function may be "optimized out" but for the moment,
+// the coding mechanism is still subject to change so these provide a
+// convenient single point of change.
+
+void vp10_clearall_segfeatures(struct segmentation *seg) {
+  vp10_zero(seg->feature_data);
+  vp10_zero(seg->feature_mask);
+}
+
+void vp10_enable_segfeature(struct segmentation *seg, int segment_id,
+                            SEG_LVL_FEATURES feature_id) {
+  seg->feature_mask[segment_id] |= 1 << feature_id;
+}
+
+int vp10_seg_feature_data_max(SEG_LVL_FEATURES feature_id) {
+  return seg_feature_data_max[feature_id];
+}
+
+int vp10_is_segfeature_signed(SEG_LVL_FEATURES feature_id) {
+  return seg_feature_data_signed[feature_id];
+}
+
+void vp10_set_segdata(struct segmentation *seg, int segment_id,
+                      SEG_LVL_FEATURES feature_id, int seg_data) {
+  assert(seg_data <= seg_feature_data_max[feature_id]);
+  if (seg_data < 0) {
+    assert(seg_feature_data_signed[feature_id]);
+    assert(-seg_data <= seg_feature_data_max[feature_id]);
+  }
+
+  seg->feature_data[segment_id][feature_id] = seg_data;
+}
+
+const vpx_tree_index vp10_segment_tree[TREE_SIZE(MAX_SEGMENTS)] = {
+  2, 4, 6, 8, 10, 12, 0, -1, -2, -3, -4, -5, -6, -7
+};
+
+// TBD? Functions to read and write segment data with range / validity checking
diff --git a/av1/common/seg_common.h b/av1/common/seg_common.h
new file mode 100644
index 0000000..7a8fa8f
--- /dev/null
+++ b/av1/common/seg_common.h
@@ -0,0 +1,82 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_SEG_COMMON_H_
+#define VP10_COMMON_SEG_COMMON_H_
+
+#include "aom_dsp/prob.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SEGMENT_DELTADATA 0
+#define SEGMENT_ABSDATA 1
+
+#define MAX_SEGMENTS 8
+#define SEG_TREE_PROBS (MAX_SEGMENTS - 1)
+
+#define PREDICTION_PROBS 3
+
+// Segment level features.
+typedef enum {
+  SEG_LVL_ALT_Q = 0,      // Use alternate Quantizer ....
+  SEG_LVL_ALT_LF = 1,     // Use alternate loop filter value...
+  SEG_LVL_REF_FRAME = 2,  // Optional Segment reference frame
+  SEG_LVL_SKIP = 3,       // Optional Segment (0,0) + skip mode
+  SEG_LVL_MAX = 4         // Number of features supported
+} SEG_LVL_FEATURES;
+
+struct segmentation {
+  uint8_t enabled;
+  uint8_t update_map;
+  uint8_t update_data;
+  uint8_t abs_delta;
+  uint8_t temporal_update;
+
+  int16_t feature_data[MAX_SEGMENTS][SEG_LVL_MAX];
+  unsigned int feature_mask[MAX_SEGMENTS];
+};
+
+struct segmentation_probs {
+  vpx_prob tree_probs[SEG_TREE_PROBS];
+  vpx_prob pred_probs[PREDICTION_PROBS];
+};
+
+static INLINE int segfeature_active(const struct segmentation *seg,
+                                    int segment_id,
+                                    SEG_LVL_FEATURES feature_id) {
+  return seg->enabled && (seg->feature_mask[segment_id] & (1 << feature_id));
+}
+
+void vp10_clearall_segfeatures(struct segmentation *seg);
+
+void vp10_enable_segfeature(struct segmentation *seg, int segment_id,
+                            SEG_LVL_FEATURES feature_id);
+
+int vp10_seg_feature_data_max(SEG_LVL_FEATURES feature_id);
+
+int vp10_is_segfeature_signed(SEG_LVL_FEATURES feature_id);
+
+void vp10_set_segdata(struct segmentation *seg, int segment_id,
+                      SEG_LVL_FEATURES feature_id, int seg_data);
+
+static INLINE int get_segdata(const struct segmentation *seg, int segment_id,
+                              SEG_LVL_FEATURES feature_id) {
+  return seg->feature_data[segment_id][feature_id];
+}
+
+extern const vpx_tree_index vp10_segment_tree[TREE_SIZE(MAX_SEGMENTS)];
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_SEG_COMMON_H_
diff --git a/av1/common/thread_common.c b/av1/common/thread_common.c
new file mode 100644
index 0000000..ba91a46
--- /dev/null
+++ b/av1/common/thread_common.c
@@ -0,0 +1,343 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_config.h"
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_mem/vpx_mem.h"
+#include "av1/common/entropymode.h"
+#include "av1/common/thread_common.h"
+#include "av1/common/reconinter.h"
+#include "av1/common/loopfilter.h"
+
+#if CONFIG_MULTITHREAD
+static INLINE void mutex_lock(pthread_mutex_t *const mutex) {
+  const int kMaxTryLocks = 4000;
+  int locked = 0;
+  int i;
+
+  for (i = 0; i < kMaxTryLocks; ++i) {
+    if (!pthread_mutex_trylock(mutex)) {
+      locked = 1;
+      break;
+    }
+  }
+
+  if (!locked) pthread_mutex_lock(mutex);
+}
+#endif  // CONFIG_MULTITHREAD
+
+static INLINE void sync_read(VP10LfSync *const lf_sync, int r, int c) {
+#if CONFIG_MULTITHREAD
+  const int nsync = lf_sync->sync_range;
+
+  if (r && !(c & (nsync - 1))) {
+    pthread_mutex_t *const mutex = &lf_sync->mutex_[r - 1];
+    mutex_lock(mutex);
+
+    while (c > lf_sync->cur_sb_col[r - 1] - nsync) {
+      pthread_cond_wait(&lf_sync->cond_[r - 1], mutex);
+    }
+    pthread_mutex_unlock(mutex);
+  }
+#else
+  (void)lf_sync;
+  (void)r;
+  (void)c;
+#endif  // CONFIG_MULTITHREAD
+}
+
+static INLINE void sync_write(VP10LfSync *const lf_sync, int r, int c,
+                              const int sb_cols) {
+#if CONFIG_MULTITHREAD
+  const int nsync = lf_sync->sync_range;
+  int cur;
+  // Only signal when there are enough filtered SB for next row to run.
+  int sig = 1;
+
+  if (c < sb_cols - 1) {
+    cur = c;
+    if (c % nsync) sig = 0;
+  } else {
+    cur = sb_cols + nsync;
+  }
+
+  if (sig) {
+    mutex_lock(&lf_sync->mutex_[r]);
+
+    lf_sync->cur_sb_col[r] = cur;
+
+    pthread_cond_signal(&lf_sync->cond_[r]);
+    pthread_mutex_unlock(&lf_sync->mutex_[r]);
+  }
+#else
+  (void)lf_sync;
+  (void)r;
+  (void)c;
+  (void)sb_cols;
+#endif  // CONFIG_MULTITHREAD
+}
+
+// Implement row loopfiltering for each thread.
+static INLINE void thread_loop_filter_rows(
+    const YV12_BUFFER_CONFIG *const frame_buffer, VP10_COMMON *const cm,
+    struct macroblockd_plane planes[MAX_MB_PLANE], int start, int stop,
+    int y_only, VP10LfSync *const lf_sync) {
+  const int num_planes = y_only ? 1 : MAX_MB_PLANE;
+  const int sb_cols = mi_cols_aligned_to_sb(cm) >> cm->mib_size_log2;
+  int mi_row, mi_col;
+#if !CONFIG_EXT_PARTITION_TYPES
+  enum lf_path path;
+  LOOP_FILTER_MASK lfm;
+  if (y_only)
+    path = LF_PATH_444;
+  else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
+    path = LF_PATH_420;
+  else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0)
+    path = LF_PATH_444;
+  else
+    path = LF_PATH_SLOW;
+#endif  // !CONFIG_EXT_PARTITION_TYPES
+
+#if CONFIG_EXT_PARTITION
+  printf(
+      "STOPPING: This code has not been modified to work with the "
+      "extended coding unit size experiment");
+  exit(EXIT_FAILURE);
+#endif  // CONFIG_EXT_PARTITION
+
+  for (mi_row = start; mi_row < stop;
+       mi_row += lf_sync->num_workers * cm->mib_size) {
+    MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
+
+    for (mi_col = 0; mi_col < cm->mi_cols; mi_col += cm->mib_size) {
+      const int r = mi_row >> cm->mib_size_log2;
+      const int c = mi_col >> cm->mib_size_log2;
+      int plane;
+
+      sync_read(lf_sync, r, c);
+
+      vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
+
+#if CONFIG_EXT_PARTITION_TYPES
+      for (plane = 0; plane < num_planes; ++plane)
+        vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, mi_row,
+                                       mi_col);
+#else
+      // TODO(JBB): Make setup_mask work for non 420.
+      vp10_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
+
+      vp10_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm);
+      for (plane = 1; plane < num_planes; ++plane) {
+        switch (path) {
+          case LF_PATH_420:
+            vp10_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm);
+            break;
+          case LF_PATH_444:
+            vp10_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm);
+            break;
+          case LF_PATH_SLOW:
+            vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
+                                           mi_row, mi_col);
+            break;
+        }
+      }
+#endif  // CONFIG_EXT_PARTITION_TYPES
+      sync_write(lf_sync, r, c, sb_cols);
+    }
+  }
+}
+
+// Row-based multi-threaded loopfilter hook
+static int loop_filter_row_worker(VP10LfSync *const lf_sync,
+                                  LFWorkerData *const lf_data) {
+  thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
+                          lf_data->start, lf_data->stop, lf_data->y_only,
+                          lf_sync);
+  return 1;
+}
+
+static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm,
+                                struct macroblockd_plane planes[MAX_MB_PLANE],
+                                int start, int stop, int y_only,
+                                VPxWorker *workers, int nworkers,
+                                VP10LfSync *lf_sync) {
+  const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
+  // Number of superblock rows and cols
+  const int sb_rows = mi_rows_aligned_to_sb(cm) >> cm->mib_size_log2;
+  // Decoder may allocate more threads than number of tiles based on user's
+  // input.
+  const int tile_cols = cm->tile_cols;
+  const int num_workers = VPXMIN(nworkers, tile_cols);
+  int i;
+
+#if CONFIG_EXT_PARTITION
+  printf(
+      "STOPPING: This code has not been modified to work with the "
+      "extended coding unit size experiment");
+  exit(EXIT_FAILURE);
+#endif  // CONFIG_EXT_PARTITION
+
+  if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
+      num_workers > lf_sync->num_workers) {
+    vp10_loop_filter_dealloc(lf_sync);
+    vp10_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
+  }
+
+  // Initialize cur_sb_col to -1 for all SB rows.
+  memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
+
+  // Set up loopfilter thread data.
+  // The decoder is capping num_workers because it has been observed that using
+  // more threads on the loopfilter than there are cores will hurt performance
+  // on Android. This is because the system will only schedule the tile decode
+  // workers on cores equal to the number of tile columns. Then if the decoder
+  // tries to use more threads for the loopfilter, it will hurt performance
+  // because of contention. If the multithreading code changes in the future
+  // then the number of workers used by the loopfilter should be revisited.
+  for (i = 0; i < num_workers; ++i) {
+    VPxWorker *const worker = &workers[i];
+    LFWorkerData *const lf_data = &lf_sync->lfdata[i];
+
+    worker->hook = (VPxWorkerHook)loop_filter_row_worker;
+    worker->data1 = lf_sync;
+    worker->data2 = lf_data;
+
+    // Loopfilter data
+    vp10_loop_filter_data_reset(lf_data, frame, cm, planes);
+    lf_data->start = start + i * cm->mib_size;
+    lf_data->stop = stop;
+    lf_data->y_only = y_only;
+
+    // Start loopfiltering
+    if (i == num_workers - 1) {
+      winterface->execute(worker);
+    } else {
+      winterface->launch(worker);
+    }
+  }
+
+  // Wait till all rows are finished
+  for (i = 0; i < num_workers; ++i) {
+    winterface->sync(&workers[i]);
+  }
+}
+
+void vp10_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm,
+                               struct macroblockd_plane planes[MAX_MB_PLANE],
+                               int frame_filter_level, int y_only,
+                               int partial_frame, VPxWorker *workers,
+                               int num_workers, VP10LfSync *lf_sync) {
+  int start_mi_row, end_mi_row, mi_rows_to_filter;
+
+  if (!frame_filter_level) return;
+
+  start_mi_row = 0;
+  mi_rows_to_filter = cm->mi_rows;
+  if (partial_frame && cm->mi_rows > 8) {
+    start_mi_row = cm->mi_rows >> 1;
+    start_mi_row &= 0xfffffff8;
+    mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8);
+  }
+  end_mi_row = start_mi_row + mi_rows_to_filter;
+  vp10_loop_filter_frame_init(cm, frame_filter_level);
+
+  loop_filter_rows_mt(frame, cm, planes, start_mi_row, end_mi_row, y_only,
+                      workers, num_workers, lf_sync);
+}
+
+// Set up nsync by width.
+static INLINE int get_sync_range(int width) {
+  // nsync numbers are picked by testing. For example, for 4k
+  // video, using 4 gives best performance.
+  if (width < 640)
+    return 1;
+  else if (width <= 1280)
+    return 2;
+  else if (width <= 4096)
+    return 4;
+  else
+    return 8;
+}
+
+// Allocate memory for lf row synchronization
+void vp10_loop_filter_alloc(VP10LfSync *lf_sync, VP10_COMMON *cm, int rows,
+                            int width, int num_workers) {
+  lf_sync->rows = rows;
+#if CONFIG_MULTITHREAD
+  {
+    int i;
+
+    CHECK_MEM_ERROR(cm, lf_sync->mutex_,
+                    vpx_malloc(sizeof(*lf_sync->mutex_) * rows));
+    if (lf_sync->mutex_) {
+      for (i = 0; i < rows; ++i) {
+        pthread_mutex_init(&lf_sync->mutex_[i], NULL);
+      }
+    }
+
+    CHECK_MEM_ERROR(cm, lf_sync->cond_,
+                    vpx_malloc(sizeof(*lf_sync->cond_) * rows));
+    if (lf_sync->cond_) {
+      for (i = 0; i < rows; ++i) {
+        pthread_cond_init(&lf_sync->cond_[i], NULL);
+      }
+    }
+  }
+#endif  // CONFIG_MULTITHREAD
+
+  CHECK_MEM_ERROR(cm, lf_sync->lfdata,
+                  vpx_malloc(num_workers * sizeof(*lf_sync->lfdata)));
+  lf_sync->num_workers = num_workers;
+
+  CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col,
+                  vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows));
+
+  // Set up nsync.
+  lf_sync->sync_range = get_sync_range(width);
+}
+
+// Deallocate lf synchronization related mutex and data
+void vp10_loop_filter_dealloc(VP10LfSync *lf_sync) {
+  if (lf_sync != NULL) {
+#if CONFIG_MULTITHREAD
+    int i;
+
+    if (lf_sync->mutex_ != NULL) {
+      for (i = 0; i < lf_sync->rows; ++i) {
+        pthread_mutex_destroy(&lf_sync->mutex_[i]);
+      }
+      vpx_free(lf_sync->mutex_);
+    }
+    if (lf_sync->cond_ != NULL) {
+      for (i = 0; i < lf_sync->rows; ++i) {
+        pthread_cond_destroy(&lf_sync->cond_[i]);
+      }
+      vpx_free(lf_sync->cond_);
+    }
+#endif  // CONFIG_MULTITHREAD
+    vpx_free(lf_sync->lfdata);
+    vpx_free(lf_sync->cur_sb_col);
+    // clear the structure as the source of this call may be a resize in which
+    // case this call will be followed by an _alloc() which may fail.
+    vp10_zero(*lf_sync);
+  }
+}
+
+// Accumulate frame counts. FRAME_COUNTS consist solely of 'unsigned int'
+// members, so we treat it as an array, and sum over the whole length.
+void vp10_accumulate_frame_counts(VP10_COMMON *cm, FRAME_COUNTS *counts) {
+  unsigned int *const acc = (unsigned int *)&cm->counts;
+  const unsigned int *const cnt = (unsigned int *)counts;
+
+  const unsigned int n_counts = sizeof(FRAME_COUNTS) / sizeof(unsigned int);
+  unsigned int i;
+
+  for (i = 0; i < n_counts; i++) acc[i] += cnt[i];
+}
diff --git a/av1/common/thread_common.h b/av1/common/thread_common.h
new file mode 100644
index 0000000..3df9557
--- /dev/null
+++ b/av1/common/thread_common.h
@@ -0,0 +1,63 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_LOOPFILTER_THREAD_H_
+#define VP10_COMMON_LOOPFILTER_THREAD_H_
+#include "./vpx_config.h"
+#include "av1/common/loopfilter.h"
+#include "aom_util/vpx_thread.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP10Common;
+struct FRAME_COUNTS;
+
+// Loopfilter row synchronization
+typedef struct VP10LfSyncData {
+#if CONFIG_MULTITHREAD
+  pthread_mutex_t *mutex_;
+  pthread_cond_t *cond_;
+#endif
+  // Allocate memory to store the loop-filtered superblock index in each row.
+  int *cur_sb_col;
+  // The optimal sync_range for different resolution and platform should be
+  // determined by testing. Currently, it is chosen to be a power-of-2 number.
+  int sync_range;
+  int rows;
+
+  // Row-based parallel loopfilter data
+  LFWorkerData *lfdata;
+  int num_workers;
+} VP10LfSync;
+
+// Allocate memory for loopfilter row synchronization.
+void vp10_loop_filter_alloc(VP10LfSync *lf_sync, struct VP10Common *cm,
+                            int rows, int width, int num_workers);
+
+// Deallocate loopfilter synchronization related mutex and data.
+void vp10_loop_filter_dealloc(VP10LfSync *lf_sync);
+
+// Multi-threaded loopfilter that uses the tile threads.
+void vp10_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct VP10Common *cm,
+                               struct macroblockd_plane planes[MAX_MB_PLANE],
+                               int frame_filter_level, int y_only,
+                               int partial_frame, VPxWorker *workers,
+                               int num_workers, VP10LfSync *lf_sync);
+
+void vp10_accumulate_frame_counts(struct VP10Common *cm,
+                                  struct FRAME_COUNTS *counts);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_LOOPFILTER_THREAD_H_
diff --git a/av1/common/tile_common.c b/av1/common/tile_common.c
new file mode 100644
index 0000000..e79734e
--- /dev/null
+++ b/av1/common/tile_common.c
@@ -0,0 +1,60 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "av1/common/tile_common.h"
+#include "av1/common/onyxc_int.h"
+#include "aom_dsp/vpx_dsp_common.h"
+
+void vp10_tile_set_row(TileInfo *tile, const VP10_COMMON *cm, int row) {
+  tile->mi_row_start = row * cm->tile_height;
+  tile->mi_row_end = VPXMIN(tile->mi_row_start + cm->tile_height, cm->mi_rows);
+}
+
+void vp10_tile_set_col(TileInfo *tile, const VP10_COMMON *cm, int col) {
+  tile->mi_col_start = col * cm->tile_width;
+  tile->mi_col_end = VPXMIN(tile->mi_col_start + cm->tile_width, cm->mi_cols);
+}
+
+void vp10_tile_init(TileInfo *tile, const VP10_COMMON *cm, int row, int col) {
+  vp10_tile_set_row(tile, cm, row);
+  vp10_tile_set_col(tile, cm, col);
+}
+
+#if !CONFIG_EXT_TILE
+
+#if CONFIG_EXT_PARTITION
+#define MIN_TILE_WIDTH_MAX_SB 2
+#define MAX_TILE_WIDTH_MAX_SB 32
+#else
+#define MIN_TILE_WIDTH_MAX_SB 4
+#define MAX_TILE_WIDTH_MAX_SB 64
+#endif  // CONFIG_EXT_PARTITION
+
+static int get_min_log2_tile_cols(const int max_sb_cols) {
+  int min_log2 = 0;
+  while ((MAX_TILE_WIDTH_MAX_SB << min_log2) < max_sb_cols) ++min_log2;
+  return min_log2;
+}
+
+static int get_max_log2_tile_cols(const int max_sb_cols) {
+  int max_log2 = 1;
+  while ((max_sb_cols >> max_log2) >= MIN_TILE_WIDTH_MAX_SB) ++max_log2;
+  return max_log2 - 1;
+}
+
+void vp10_get_tile_n_bits(const int mi_cols, int *min_log2_tile_cols,
+                          int *max_log2_tile_cols) {
+  const int max_sb_cols =
+      ALIGN_POWER_OF_TWO(mi_cols, MAX_MIB_SIZE_LOG2) >> MAX_MIB_SIZE_LOG2;
+  *min_log2_tile_cols = get_min_log2_tile_cols(max_sb_cols);
+  *max_log2_tile_cols = get_max_log2_tile_cols(max_sb_cols);
+  assert(*min_log2_tile_cols <= *max_log2_tile_cols);
+}
+#endif  // !CONFIG_EXT_TILE
diff --git a/av1/common/tile_common.h b/av1/common/tile_common.h
new file mode 100644
index 0000000..a502173
--- /dev/null
+++ b/av1/common/tile_common.h
@@ -0,0 +1,40 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_TILE_COMMON_H_
+#define VP10_COMMON_TILE_COMMON_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP10Common;
+
+typedef struct TileInfo {
+  int mi_row_start, mi_row_end;
+  int mi_col_start, mi_col_end;
+} TileInfo;
+
+// initializes 'tile->mi_(row|col)_(start|end)' for (row, col) based on
+// 'cm->log2_tile_(rows|cols)' & 'cm->mi_(rows|cols)'
+void vp10_tile_init(TileInfo *tile, const struct VP10Common *cm, int row,
+                    int col);
+
+void vp10_tile_set_row(TileInfo *tile, const struct VP10Common *cm, int row);
+void vp10_tile_set_col(TileInfo *tile, const struct VP10Common *cm, int col);
+
+void vp10_get_tile_n_bits(const int mi_cols, int *min_log2_tile_cols,
+                          int *max_log2_tile_cols);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_TILE_COMMON_H_
diff --git a/av1/common/vp10_convolve.c b/av1/common/vp10_convolve.c
new file mode 100644
index 0000000..b62bae5
--- /dev/null
+++ b/av1/common/vp10_convolve.c
@@ -0,0 +1,353 @@
+#include <assert.h>
+#include <string.h>
+
+#include "./vp10_rtcd.h"
+#include "av1/common/vp10_convolve.h"
+#include "av1/common/filter.h"
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_ports/mem.h"
+
+#define MAX_BLOCK_WIDTH (MAX_SB_SIZE)
+#define MAX_BLOCK_HEIGHT (MAX_SB_SIZE)
+#define MAX_STEP (32)
+#define MAX_FILTER_TAP (12)
+
+void vp10_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst,
+                           int dst_stride, int w, int h,
+                           const InterpFilterParams filter_params,
+                           const int subpel_x_q4, int x_step_q4, int avg) {
+  int x, y;
+  int filter_size = filter_params.taps;
+  src -= filter_size / 2 - 1;
+  for (y = 0; y < h; ++y) {
+    int x_q4 = subpel_x_q4;
+    for (x = 0; x < w; ++x) {
+      const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
+      const int16_t *x_filter = vp10_get_interp_filter_subpel_kernel(
+          filter_params, x_q4 & SUBPEL_MASK);
+      int k, sum = 0;
+      for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
+      if (avg) {
+        dst[x] = ROUND_POWER_OF_TWO(
+            dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
+      } else {
+        dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
+      }
+      x_q4 += x_step_q4;
+    }
+    src += src_stride;
+    dst += dst_stride;
+  }
+}
+
+void vp10_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst,
+                          int dst_stride, int w, int h,
+                          const InterpFilterParams filter_params,
+                          const int subpel_y_q4, int y_step_q4, int avg) {
+  int x, y;
+  int filter_size = filter_params.taps;
+  src -= src_stride * (filter_size / 2 - 1);
+
+  for (x = 0; x < w; ++x) {
+    int y_q4 = subpel_y_q4;
+    for (y = 0; y < h; ++y) {
+      const uint8_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
+      const int16_t *y_filter = vp10_get_interp_filter_subpel_kernel(
+          filter_params, y_q4 & SUBPEL_MASK);
+      int k, sum = 0;
+      for (k = 0; k < filter_size; ++k)
+        sum += src_y[k * src_stride] * y_filter[k];
+      if (avg) {
+        dst[y * dst_stride] = ROUND_POWER_OF_TWO(
+            dst[y * dst_stride] +
+                clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)),
+            1);
+      } else {
+        dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
+      }
+      y_q4 += y_step_q4;
+    }
+    ++src;
+    ++dst;
+  }
+}
+
+static void convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst,
+                          int dst_stride, int w, int h, int avg) {
+  if (avg == 0) {
+    int r;
+    for (r = 0; r < h; ++r) {
+      memcpy(dst, src, w);
+      src += src_stride;
+      dst += dst_stride;
+    }
+  } else {
+    int r, c;
+    for (r = 0; r < h; ++r) {
+      for (c = 0; c < w; ++c) {
+        dst[c] = clip_pixel(ROUND_POWER_OF_TWO(dst[c] + src[c], 1));
+      }
+      src += src_stride;
+      dst += dst_stride;
+    }
+  }
+}
+
+void vp10_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
+                   int dst_stride, int w, int h,
+#if CONFIG_DUAL_FILTER
+                   const INTERP_FILTER *interp_filter,
+#else
+                   const INTERP_FILTER interp_filter,
+#endif
+                   const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
+                   int y_step_q4, int ref_idx) {
+  int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
+  int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
+
+  assert(w <= MAX_BLOCK_WIDTH);
+  assert(h <= MAX_BLOCK_HEIGHT);
+  assert(y_step_q4 <= MAX_STEP);
+  assert(x_step_q4 <= MAX_STEP);
+
+  if (ignore_horiz && ignore_vert) {
+    convolve_copy(src, src_stride, dst, dst_stride, w, h, ref_idx);
+  } else if (ignore_vert) {
+#if CONFIG_DUAL_FILTER
+    InterpFilterParams filter_params =
+        vp10_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
+#else
+    InterpFilterParams filter_params =
+        vp10_get_interp_filter_params(interp_filter);
+#endif
+    assert(filter_params.taps <= MAX_FILTER_TAP);
+    vp10_convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
+                        subpel_x_q4, x_step_q4, ref_idx);
+  } else if (ignore_horiz) {
+#if CONFIG_DUAL_FILTER
+    InterpFilterParams filter_params =
+        vp10_get_interp_filter_params(interp_filter[2 * ref_idx]);
+#else
+    InterpFilterParams filter_params =
+        vp10_get_interp_filter_params(interp_filter);
+#endif
+    assert(filter_params.taps <= MAX_FILTER_TAP);
+    vp10_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
+                       subpel_y_q4, y_step_q4, ref_idx);
+  } else {
+    // temp's size is set to (maximum possible intermediate_height) *
+    // MAX_BLOCK_WIDTH
+    uint8_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
+                  MAX_FILTER_TAP) *
+                 MAX_BLOCK_WIDTH];
+    int temp_stride = MAX_BLOCK_WIDTH;
+#if CONFIG_DUAL_FILTER
+    InterpFilterParams filter_params_x =
+        vp10_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
+    InterpFilterParams filter_params_y =
+        vp10_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
+    InterpFilterParams filter_params = filter_params_x;
+
+    // The filter size implies the required number of reference pixels for
+    // the second stage filtering. It is possible that the two directions
+    // require different filter sizes.
+    int filter_size = filter_params_y.taps;
+#else
+    InterpFilterParams filter_params =
+        vp10_get_interp_filter_params(interp_filter);
+    int filter_size = filter_params.taps;
+#endif
+    int intermediate_height =
+        (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
+
+    assert(filter_params.taps <= MAX_FILTER_TAP);
+
+    vp10_convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride,
+                        temp, temp_stride, w, intermediate_height,
+                        filter_params, subpel_x_q4, x_step_q4, 0);
+
+#if CONFIG_DUAL_FILTER
+    filter_params = filter_params_y;
+#else
+    filter_params = vp10_get_interp_filter_params(interp_filter);
+#endif
+    filter_size = filter_params.taps;
+    assert(filter_params.taps <= MAX_FILTER_TAP);
+
+    vp10_convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride,
+                       dst, dst_stride, w, h, filter_params, subpel_y_q4,
+                       y_step_q4, ref_idx);
+  }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_convolve_horiz_c(const uint16_t *src, int src_stride,
+                                  uint16_t *dst, int dst_stride, int w, int h,
+                                  const InterpFilterParams filter_params,
+                                  const int subpel_x_q4, int x_step_q4, int avg,
+                                  int bd) {
+  int x, y;
+  int filter_size = filter_params.taps;
+  src -= filter_size / 2 - 1;
+  for (y = 0; y < h; ++y) {
+    int x_q4 = subpel_x_q4;
+    for (x = 0; x < w; ++x) {
+      const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
+      const int16_t *x_filter = vp10_get_interp_filter_subpel_kernel(
+          filter_params, x_q4 & SUBPEL_MASK);
+      int k, sum = 0;
+      for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
+      if (avg)
+        dst[x] = ROUND_POWER_OF_TWO(
+            dst[x] +
+                clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
+            1);
+      else
+        dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
+      x_q4 += x_step_q4;
+    }
+    src += src_stride;
+    dst += dst_stride;
+  }
+}
+
+void vp10_highbd_convolve_vert_c(const uint16_t *src, int src_stride,
+                                 uint16_t *dst, int dst_stride, int w, int h,
+                                 const InterpFilterParams filter_params,
+                                 const int subpel_y_q4, int y_step_q4, int avg,
+                                 int bd) {
+  int x, y;
+  int filter_size = filter_params.taps;
+  src -= src_stride * (filter_size / 2 - 1);
+
+  for (x = 0; x < w; ++x) {
+    int y_q4 = subpel_y_q4;
+    for (y = 0; y < h; ++y) {
+      const uint16_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
+      const int16_t *y_filter = vp10_get_interp_filter_subpel_kernel(
+          filter_params, y_q4 & SUBPEL_MASK);
+      int k, sum = 0;
+      for (k = 0; k < filter_size; ++k)
+        sum += src_y[k * src_stride] * y_filter[k];
+      if (avg) {
+        dst[y * dst_stride] = ROUND_POWER_OF_TWO(
+            dst[y * dst_stride] +
+                clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
+            1);
+      } else {
+        dst[y * dst_stride] =
+            clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
+      }
+      y_q4 += y_step_q4;
+    }
+    ++src;
+    ++dst;
+  }
+}
+
+static void highbd_convolve_copy(const uint16_t *src, int src_stride,
+                                 uint16_t *dst, int dst_stride, int w, int h,
+                                 int avg, int bd) {
+  if (avg == 0) {
+    int r;
+    for (r = 0; r < h; ++r) {
+      memcpy(dst, src, w * sizeof(*src));
+      src += src_stride;
+      dst += dst_stride;
+    }
+  } else {
+    int r, c;
+    for (r = 0; r < h; ++r) {
+      for (c = 0; c < w; ++c) {
+        dst[c] = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst[c] + src[c], 1), bd);
+      }
+      src += src_stride;
+      dst += dst_stride;
+    }
+  }
+}
+
+void vp10_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
+                          int dst_stride, int w, int h,
+#if CONFIG_DUAL_FILTER
+                          const INTERP_FILTER *interp_filter,
+#else
+                          const INTERP_FILTER interp_filter,
+#endif
+                          const int subpel_x_q4, int x_step_q4,
+                          const int subpel_y_q4, int y_step_q4, int ref_idx,
+                          int bd) {
+  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+  int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
+  int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
+
+  assert(w <= MAX_BLOCK_WIDTH);
+  assert(h <= MAX_BLOCK_HEIGHT);
+  assert(y_step_q4 <= MAX_STEP);
+  assert(x_step_q4 <= MAX_STEP);
+
+  if (ignore_horiz && ignore_vert) {
+    highbd_convolve_copy(src, src_stride, dst, dst_stride, w, h, ref_idx, bd);
+  } else if (ignore_vert) {
+#if CONFIG_DUAL_FILTER
+    InterpFilterParams filter_params =
+        vp10_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
+#else
+    InterpFilterParams filter_params =
+        vp10_get_interp_filter_params(interp_filter);
+#endif
+    vp10_highbd_convolve_horiz(src, src_stride, dst, dst_stride, w, h,
+                               filter_params, subpel_x_q4, x_step_q4, ref_idx,
+                               bd);
+  } else if (ignore_horiz) {
+#if CONFIG_DUAL_FILTER
+    InterpFilterParams filter_params =
+        vp10_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
+#else
+    InterpFilterParams filter_params =
+        vp10_get_interp_filter_params(interp_filter);
+#endif
+    vp10_highbd_convolve_vert(src, src_stride, dst, dst_stride, w, h,
+                              filter_params, subpel_y_q4, y_step_q4, ref_idx,
+                              bd);
+  } else {
+    // temp's size is set to (maximum possible intermediate_height) *
+    // MAX_BLOCK_WIDTH
+    uint16_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
+                   MAX_FILTER_TAP) *
+                  MAX_BLOCK_WIDTH];
+    int temp_stride = MAX_BLOCK_WIDTH;
+
+#if CONFIG_DUAL_FILTER
+    InterpFilterParams filter_params_x =
+        vp10_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
+    InterpFilterParams filter_params_y =
+        vp10_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
+    InterpFilterParams filter_params = filter_params_x;
+    int filter_size = filter_params_y.taps;
+#else
+    InterpFilterParams filter_params =
+        vp10_get_interp_filter_params(interp_filter);
+    int filter_size = filter_params.taps;
+#endif
+
+    int intermediate_height =
+        (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
+
+    vp10_highbd_convolve_horiz(
+        src - src_stride * (filter_size / 2 - 1), src_stride, temp, temp_stride,
+        w, intermediate_height, filter_params, subpel_x_q4, x_step_q4, 0, bd);
+
+#if CONFIG_DUAL_FILTER
+    filter_params = filter_params_y;
+#endif
+    filter_size = filter_params.taps;
+    assert(filter_params.taps <= MAX_FILTER_TAP);
+
+    vp10_highbd_convolve_vert(temp + temp_stride * (filter_size / 2 - 1),
+                              temp_stride, dst, dst_stride, w, h, filter_params,
+                              subpel_y_q4, y_step_q4, ref_idx, bd);
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
diff --git a/av1/common/vp10_convolve.h b/av1/common/vp10_convolve.h
new file mode 100644
index 0000000..9343402
--- /dev/null
+++ b/av1/common/vp10_convolve.h
@@ -0,0 +1,35 @@
+#ifndef VP10_COMMON_VP10_CONVOLVE_H_
+#define VP10_COMMON_VP10_CONVOLVE_H_
+#include "av1/common/filter.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
+                   int dst_stride, int w, int h,
+#if CONFIG_DUAL_FILTER
+                   const INTERP_FILTER *interp_filter,
+#else
+                   const INTERP_FILTER interp_filter,
+#endif
+                   const int subpel_x, int xstep, const int subpel_y, int ystep,
+                   int avg);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
+                          int dst_stride, int w, int h,
+#if CONFIG_DUAL_FILTER
+                          const INTERP_FILTER *interp_filter,
+#else
+                          const INTERP_FILTER interp_filter,
+#endif
+                          const int subpel_x, int xstep, const int subpel_y,
+                          int ystep, int avg, int bd);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP10_COMMON_VP10_CONVOLVE_H_
diff --git a/av1/common/vp10_fwd_txfm.c b/av1/common/vp10_fwd_txfm.c
new file mode 100644
index 0000000..eb1c018
--- /dev/null
+++ b/av1/common/vp10_fwd_txfm.c
@@ -0,0 +1,814 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp10_rtcd.h"
+#include "av1/common/vp10_fwd_txfm.h"
+
+void vp10_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
+  // The 2D transform is done with two passes which are actually pretty
+  // similar. In the first one, we transform the columns and transpose
+  // the results. In the second one, we transform the rows. To achieve that,
+  // as the first pass results are transposed, we transpose the columns (that
+  // is the transposed rows) and transpose the results (so that it goes back
+  // in normal/row positions).
+  int pass;
+  // We need an intermediate buffer between passes.
+  tran_low_t intermediate[4 * 4];
+  const int16_t *in_pass0 = input;
+  const tran_low_t *in = NULL;
+  tran_low_t *out = intermediate;
+  // Do the two transform/transpose passes
+  for (pass = 0; pass < 2; ++pass) {
+    tran_high_t input[4];      // canbe16
+    tran_high_t step[4];       // canbe16
+    tran_high_t temp1, temp2;  // needs32
+    int i;
+    for (i = 0; i < 4; ++i) {
+      // Load inputs.
+      if (0 == pass) {
+        input[0] = in_pass0[0 * stride] * 16;
+        input[1] = in_pass0[1 * stride] * 16;
+        input[2] = in_pass0[2 * stride] * 16;
+        input[3] = in_pass0[3 * stride] * 16;
+        if (i == 0 && input[0]) {
+          input[0] += 1;
+        }
+      } else {
+        input[0] = in[0 * 4];
+        input[1] = in[1 * 4];
+        input[2] = in[2 * 4];
+        input[3] = in[3 * 4];
+      }
+      // Transform.
+      step[0] = input[0] + input[3];
+      step[1] = input[1] + input[2];
+      step[2] = input[1] - input[2];
+      step[3] = input[0] - input[3];
+      temp1 = (step[0] + step[1]) * cospi_16_64;
+      temp2 = (step[0] - step[1]) * cospi_16_64;
+      out[0] = (tran_low_t)fdct_round_shift(temp1);
+      out[2] = (tran_low_t)fdct_round_shift(temp2);
+      temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
+      temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
+      out[1] = (tran_low_t)fdct_round_shift(temp1);
+      out[3] = (tran_low_t)fdct_round_shift(temp2);
+      // Do next column (which is a transposed row in second/horizontal pass)
+      in_pass0++;
+      in++;
+      out += 4;
+    }
+    // Setup in/out for next pass.
+    in = intermediate;
+    out = output;
+  }
+
+  {
+    int i, j;
+    for (i = 0; i < 4; ++i) {
+      for (j = 0; j < 4; ++j) output[j + i * 4] = (output[j + i * 4] + 1) >> 2;
+    }
+  }
+}
+
+void vp10_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride) {
+  int r, c;
+  tran_low_t sum = 0;
+  for (r = 0; r < 4; ++r)
+    for (c = 0; c < 4; ++c) sum += input[r * stride + c];
+
+  output[0] = sum << 1;
+  output[1] = 0;
+}
+
+void vp10_fdct8x8_c(const int16_t *input, tran_low_t *final_output,
+                    int stride) {
+  int i, j;
+  tran_low_t intermediate[64];
+  int pass;
+  tran_low_t *output = intermediate;
+  const tran_low_t *in = NULL;
+
+  // Transform columns
+  for (pass = 0; pass < 2; ++pass) {
+    tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;  // canbe16
+    tran_high_t t0, t1, t2, t3;                  // needs32
+    tran_high_t x0, x1, x2, x3;                  // canbe16
+
+    int i;
+    for (i = 0; i < 8; i++) {
+      // stage 1
+      if (pass == 0) {
+        s0 = (input[0 * stride] + input[7 * stride]) * 4;
+        s1 = (input[1 * stride] + input[6 * stride]) * 4;
+        s2 = (input[2 * stride] + input[5 * stride]) * 4;
+        s3 = (input[3 * stride] + input[4 * stride]) * 4;
+        s4 = (input[3 * stride] - input[4 * stride]) * 4;
+        s5 = (input[2 * stride] - input[5 * stride]) * 4;
+        s6 = (input[1 * stride] - input[6 * stride]) * 4;
+        s7 = (input[0 * stride] - input[7 * stride]) * 4;
+        ++input;
+      } else {
+        s0 = in[0 * 8] + in[7 * 8];
+        s1 = in[1 * 8] + in[6 * 8];
+        s2 = in[2 * 8] + in[5 * 8];
+        s3 = in[3 * 8] + in[4 * 8];
+        s4 = in[3 * 8] - in[4 * 8];
+        s5 = in[2 * 8] - in[5 * 8];
+        s6 = in[1 * 8] - in[6 * 8];
+        s7 = in[0 * 8] - in[7 * 8];
+        ++in;
+      }
+
+      // fdct4(step, step);
+      x0 = s0 + s3;
+      x1 = s1 + s2;
+      x2 = s1 - s2;
+      x3 = s0 - s3;
+      t0 = (x0 + x1) * cospi_16_64;
+      t1 = (x0 - x1) * cospi_16_64;
+      t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
+      t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
+      output[0] = (tran_low_t)fdct_round_shift(t0);
+      output[2] = (tran_low_t)fdct_round_shift(t2);
+      output[4] = (tran_low_t)fdct_round_shift(t1);
+      output[6] = (tran_low_t)fdct_round_shift(t3);
+
+      // Stage 2
+      t0 = (s6 - s5) * cospi_16_64;
+      t1 = (s6 + s5) * cospi_16_64;
+      t2 = fdct_round_shift(t0);
+      t3 = fdct_round_shift(t1);
+
+      // Stage 3
+      x0 = s4 + t2;
+      x1 = s4 - t2;
+      x2 = s7 - t3;
+      x3 = s7 + t3;
+
+      // Stage 4
+      t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
+      t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
+      t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
+      t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
+      output[1] = (tran_low_t)fdct_round_shift(t0);
+      output[3] = (tran_low_t)fdct_round_shift(t2);
+      output[5] = (tran_low_t)fdct_round_shift(t1);
+      output[7] = (tran_low_t)fdct_round_shift(t3);
+      output += 8;
+    }
+    in = intermediate;
+    output = final_output;
+  }
+
+  // Rows
+  for (i = 0; i < 8; ++i) {
+    for (j = 0; j < 8; ++j) final_output[j + i * 8] /= 2;
+  }
+}
+
+void vp10_fdct8x8_1_c(const int16_t *input, tran_low_t *output, int stride) {
+  int r, c;
+  tran_low_t sum = 0;
+  for (r = 0; r < 8; ++r)
+    for (c = 0; c < 8; ++c) sum += input[r * stride + c];
+
+  output[0] = sum;
+  output[1] = 0;
+}
+
+void vp10_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
+  // The 2D transform is done with two passes which are actually pretty
+  // similar. In the first one, we transform the columns and transpose
+  // the results. In the second one, we transform the rows. To achieve that,
+  // as the first pass results are transposed, we transpose the columns (that
+  // is the transposed rows) and transpose the results (so that it goes back
+  // in normal/row positions).
+  int pass;
+  // We need an intermediate buffer between passes.
+  tran_low_t intermediate[256];
+  const int16_t *in_pass0 = input;
+  const tran_low_t *in = NULL;
+  tran_low_t *out = intermediate;
+  // Do the two transform/transpose passes
+  for (pass = 0; pass < 2; ++pass) {
+    tran_high_t step1[8];      // canbe16
+    tran_high_t step2[8];      // canbe16
+    tran_high_t step3[8];      // canbe16
+    tran_high_t input[8];      // canbe16
+    tran_high_t temp1, temp2;  // needs32
+    int i;
+    for (i = 0; i < 16; i++) {
+      if (0 == pass) {
+        // Calculate input for the first 8 results.
+        input[0] = (in_pass0[0 * stride] + in_pass0[15 * stride]) * 4;
+        input[1] = (in_pass0[1 * stride] + in_pass0[14 * stride]) * 4;
+        input[2] = (in_pass0[2 * stride] + in_pass0[13 * stride]) * 4;
+        input[3] = (in_pass0[3 * stride] + in_pass0[12 * stride]) * 4;
+        input[4] = (in_pass0[4 * stride] + in_pass0[11 * stride]) * 4;
+        input[5] = (in_pass0[5 * stride] + in_pass0[10 * stride]) * 4;
+        input[6] = (in_pass0[6 * stride] + in_pass0[9 * stride]) * 4;
+        input[7] = (in_pass0[7 * stride] + in_pass0[8 * stride]) * 4;
+        // Calculate input for the next 8 results.
+        step1[0] = (in_pass0[7 * stride] - in_pass0[8 * stride]) * 4;
+        step1[1] = (in_pass0[6 * stride] - in_pass0[9 * stride]) * 4;
+        step1[2] = (in_pass0[5 * stride] - in_pass0[10 * stride]) * 4;
+        step1[3] = (in_pass0[4 * stride] - in_pass0[11 * stride]) * 4;
+        step1[4] = (in_pass0[3 * stride] - in_pass0[12 * stride]) * 4;
+        step1[5] = (in_pass0[2 * stride] - in_pass0[13 * stride]) * 4;
+        step1[6] = (in_pass0[1 * stride] - in_pass0[14 * stride]) * 4;
+        step1[7] = (in_pass0[0 * stride] - in_pass0[15 * stride]) * 4;
+      } else {
+        // Calculate input for the first 8 results.
+        input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2);
+        input[1] = ((in[1 * 16] + 1) >> 2) + ((in[14 * 16] + 1) >> 2);
+        input[2] = ((in[2 * 16] + 1) >> 2) + ((in[13 * 16] + 1) >> 2);
+        input[3] = ((in[3 * 16] + 1) >> 2) + ((in[12 * 16] + 1) >> 2);
+        input[4] = ((in[4 * 16] + 1) >> 2) + ((in[11 * 16] + 1) >> 2);
+        input[5] = ((in[5 * 16] + 1) >> 2) + ((in[10 * 16] + 1) >> 2);
+        input[6] = ((in[6 * 16] + 1) >> 2) + ((in[9 * 16] + 1) >> 2);
+        input[7] = ((in[7 * 16] + 1) >> 2) + ((in[8 * 16] + 1) >> 2);
+        // Calculate input for the next 8 results.
+        step1[0] = ((in[7 * 16] + 1) >> 2) - ((in[8 * 16] + 1) >> 2);
+        step1[1] = ((in[6 * 16] + 1) >> 2) - ((in[9 * 16] + 1) >> 2);
+        step1[2] = ((in[5 * 16] + 1) >> 2) - ((in[10 * 16] + 1) >> 2);
+        step1[3] = ((in[4 * 16] + 1) >> 2) - ((in[11 * 16] + 1) >> 2);
+        step1[4] = ((in[3 * 16] + 1) >> 2) - ((in[12 * 16] + 1) >> 2);
+        step1[5] = ((in[2 * 16] + 1) >> 2) - ((in[13 * 16] + 1) >> 2);
+        step1[6] = ((in[1 * 16] + 1) >> 2) - ((in[14 * 16] + 1) >> 2);
+        step1[7] = ((in[0 * 16] + 1) >> 2) - ((in[15 * 16] + 1) >> 2);
+      }
+      // Work on the first eight values; fdct8(input, even_results);
+      {
+        tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;  // canbe16
+        tran_high_t t0, t1, t2, t3;                  // needs32
+        tran_high_t x0, x1, x2, x3;                  // canbe16
+
+        // stage 1
+        s0 = input[0] + input[7];
+        s1 = input[1] + input[6];
+        s2 = input[2] + input[5];
+        s3 = input[3] + input[4];
+        s4 = input[3] - input[4];
+        s5 = input[2] - input[5];
+        s6 = input[1] - input[6];
+        s7 = input[0] - input[7];
+
+        // fdct4(step, step);
+        x0 = s0 + s3;
+        x1 = s1 + s2;
+        x2 = s1 - s2;
+        x3 = s0 - s3;
+        t0 = (x0 + x1) * cospi_16_64;
+        t1 = (x0 - x1) * cospi_16_64;
+        t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
+        t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
+        out[0] = (tran_low_t)fdct_round_shift(t0);
+        out[4] = (tran_low_t)fdct_round_shift(t2);
+        out[8] = (tran_low_t)fdct_round_shift(t1);
+        out[12] = (tran_low_t)fdct_round_shift(t3);
+
+        // Stage 2
+        t0 = (s6 - s5) * cospi_16_64;
+        t1 = (s6 + s5) * cospi_16_64;
+        t2 = fdct_round_shift(t0);
+        t3 = fdct_round_shift(t1);
+
+        // Stage 3
+        x0 = s4 + t2;
+        x1 = s4 - t2;
+        x2 = s7 - t3;
+        x3 = s7 + t3;
+
+        // Stage 4
+        t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
+        t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
+        t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
+        t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
+        out[2] = (tran_low_t)fdct_round_shift(t0);
+        out[6] = (tran_low_t)fdct_round_shift(t2);
+        out[10] = (tran_low_t)fdct_round_shift(t1);
+        out[14] = (tran_low_t)fdct_round_shift(t3);
+      }
+      // Work on the next eight values; step1 -> odd_results
+      {
+        // step 2
+        temp1 = (step1[5] - step1[2]) * cospi_16_64;
+        temp2 = (step1[4] - step1[3]) * cospi_16_64;
+        step2[2] = fdct_round_shift(temp1);
+        step2[3] = fdct_round_shift(temp2);
+        temp1 = (step1[4] + step1[3]) * cospi_16_64;
+        temp2 = (step1[5] + step1[2]) * cospi_16_64;
+        step2[4] = fdct_round_shift(temp1);
+        step2[5] = fdct_round_shift(temp2);
+        // step 3
+        step3[0] = step1[0] + step2[3];
+        step3[1] = step1[1] + step2[2];
+        step3[2] = step1[1] - step2[2];
+        step3[3] = step1[0] - step2[3];
+        step3[4] = step1[7] - step2[4];
+        step3[5] = step1[6] - step2[5];
+        step3[6] = step1[6] + step2[5];
+        step3[7] = step1[7] + step2[4];
+        // step 4
+        temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64;
+        temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64;
+        step2[1] = fdct_round_shift(temp1);
+        step2[2] = fdct_round_shift(temp2);
+        temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64;
+        temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64;
+        step2[5] = fdct_round_shift(temp1);
+        step2[6] = fdct_round_shift(temp2);
+        // step 5
+        step1[0] = step3[0] + step2[1];
+        step1[1] = step3[0] - step2[1];
+        step1[2] = step3[3] + step2[2];
+        step1[3] = step3[3] - step2[2];
+        step1[4] = step3[4] - step2[5];
+        step1[5] = step3[4] + step2[5];
+        step1[6] = step3[7] - step2[6];
+        step1[7] = step3[7] + step2[6];
+        // step 6
+        temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64;
+        temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64;
+        out[1] = (tran_low_t)fdct_round_shift(temp1);
+        out[9] = (tran_low_t)fdct_round_shift(temp2);
+        temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64;
+        temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64;
+        out[5] = (tran_low_t)fdct_round_shift(temp1);
+        out[13] = (tran_low_t)fdct_round_shift(temp2);
+        temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64;
+        temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
+        out[3] = (tran_low_t)fdct_round_shift(temp1);
+        out[11] = (tran_low_t)fdct_round_shift(temp2);
+        temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
+        temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
+        out[7] = (tran_low_t)fdct_round_shift(temp1);
+        out[15] = (tran_low_t)fdct_round_shift(temp2);
+      }
+      // Do next column (which is a transposed row in second/horizontal pass)
+      in++;
+      in_pass0++;
+      out += 16;
+    }
+    // Setup in/out for next pass.
+    in = intermediate;
+    out = output;
+  }
+}
+
+void vp10_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride) {
+  int r, c;
+  tran_low_t sum = 0;
+  for (r = 0; r < 16; ++r)
+    for (c = 0; c < 16; ++c) sum += input[r * stride + c];
+
+  output[0] = sum >> 1;
+  output[1] = 0;
+}
+
+static INLINE tran_high_t dct_32_round(tran_high_t input) {
+  tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
+  // TODO(debargha, peter.derivaz): Find new bounds for this assert,
+  // and make the bounds consts.
+  // assert(-131072 <= rv && rv <= 131071);
+  return rv;
+}
+
+static INLINE tran_high_t half_round_shift(tran_high_t input) {
+  tran_high_t rv = (input + 1 + (input < 0)) >> 2;
+  return rv;
+}
+
+void vp10_fdct32(const tran_high_t *input, tran_high_t *output, int round) {
+  tran_high_t step[32];
+  // Stage 1
+  step[0] = input[0] + input[(32 - 1)];
+  step[1] = input[1] + input[(32 - 2)];
+  step[2] = input[2] + input[(32 - 3)];
+  step[3] = input[3] + input[(32 - 4)];
+  step[4] = input[4] + input[(32 - 5)];
+  step[5] = input[5] + input[(32 - 6)];
+  step[6] = input[6] + input[(32 - 7)];
+  step[7] = input[7] + input[(32 - 8)];
+  step[8] = input[8] + input[(32 - 9)];
+  step[9] = input[9] + input[(32 - 10)];
+  step[10] = input[10] + input[(32 - 11)];
+  step[11] = input[11] + input[(32 - 12)];
+  step[12] = input[12] + input[(32 - 13)];
+  step[13] = input[13] + input[(32 - 14)];
+  step[14] = input[14] + input[(32 - 15)];
+  step[15] = input[15] + input[(32 - 16)];
+  step[16] = -input[16] + input[(32 - 17)];
+  step[17] = -input[17] + input[(32 - 18)];
+  step[18] = -input[18] + input[(32 - 19)];
+  step[19] = -input[19] + input[(32 - 20)];
+  step[20] = -input[20] + input[(32 - 21)];
+  step[21] = -input[21] + input[(32 - 22)];
+  step[22] = -input[22] + input[(32 - 23)];
+  step[23] = -input[23] + input[(32 - 24)];
+  step[24] = -input[24] + input[(32 - 25)];
+  step[25] = -input[25] + input[(32 - 26)];
+  step[26] = -input[26] + input[(32 - 27)];
+  step[27] = -input[27] + input[(32 - 28)];
+  step[28] = -input[28] + input[(32 - 29)];
+  step[29] = -input[29] + input[(32 - 30)];
+  step[30] = -input[30] + input[(32 - 31)];
+  step[31] = -input[31] + input[(32 - 32)];
+
+  // Stage 2
+  output[0] = step[0] + step[16 - 1];
+  output[1] = step[1] + step[16 - 2];
+  output[2] = step[2] + step[16 - 3];
+  output[3] = step[3] + step[16 - 4];
+  output[4] = step[4] + step[16 - 5];
+  output[5] = step[5] + step[16 - 6];
+  output[6] = step[6] + step[16 - 7];
+  output[7] = step[7] + step[16 - 8];
+  output[8] = -step[8] + step[16 - 9];
+  output[9] = -step[9] + step[16 - 10];
+  output[10] = -step[10] + step[16 - 11];
+  output[11] = -step[11] + step[16 - 12];
+  output[12] = -step[12] + step[16 - 13];
+  output[13] = -step[13] + step[16 - 14];
+  output[14] = -step[14] + step[16 - 15];
+  output[15] = -step[15] + step[16 - 16];
+
+  output[16] = step[16];
+  output[17] = step[17];
+  output[18] = step[18];
+  output[19] = step[19];
+
+  output[20] = dct_32_round((-step[20] + step[27]) * cospi_16_64);
+  output[21] = dct_32_round((-step[21] + step[26]) * cospi_16_64);
+  output[22] = dct_32_round((-step[22] + step[25]) * cospi_16_64);
+  output[23] = dct_32_round((-step[23] + step[24]) * cospi_16_64);
+
+  output[24] = dct_32_round((step[24] + step[23]) * cospi_16_64);
+  output[25] = dct_32_round((step[25] + step[22]) * cospi_16_64);
+  output[26] = dct_32_round((step[26] + step[21]) * cospi_16_64);
+  output[27] = dct_32_round((step[27] + step[20]) * cospi_16_64);
+
+  output[28] = step[28];
+  output[29] = step[29];
+  output[30] = step[30];
+  output[31] = step[31];
+
+  // dump the magnitude by 4, hence the intermediate values are within
+  // the range of 16 bits.
+  if (round) {
+    output[0] = half_round_shift(output[0]);
+    output[1] = half_round_shift(output[1]);
+    output[2] = half_round_shift(output[2]);
+    output[3] = half_round_shift(output[3]);
+    output[4] = half_round_shift(output[4]);
+    output[5] = half_round_shift(output[5]);
+    output[6] = half_round_shift(output[6]);
+    output[7] = half_round_shift(output[7]);
+    output[8] = half_round_shift(output[8]);
+    output[9] = half_round_shift(output[9]);
+    output[10] = half_round_shift(output[10]);
+    output[11] = half_round_shift(output[11]);
+    output[12] = half_round_shift(output[12]);
+    output[13] = half_round_shift(output[13]);
+    output[14] = half_round_shift(output[14]);
+    output[15] = half_round_shift(output[15]);
+
+    output[16] = half_round_shift(output[16]);
+    output[17] = half_round_shift(output[17]);
+    output[18] = half_round_shift(output[18]);
+    output[19] = half_round_shift(output[19]);
+    output[20] = half_round_shift(output[20]);
+    output[21] = half_round_shift(output[21]);
+    output[22] = half_round_shift(output[22]);
+    output[23] = half_round_shift(output[23]);
+    output[24] = half_round_shift(output[24]);
+    output[25] = half_round_shift(output[25]);
+    output[26] = half_round_shift(output[26]);
+    output[27] = half_round_shift(output[27]);
+    output[28] = half_round_shift(output[28]);
+    output[29] = half_round_shift(output[29]);
+    output[30] = half_round_shift(output[30]);
+    output[31] = half_round_shift(output[31]);
+  }
+
+  // Stage 3
+  step[0] = output[0] + output[(8 - 1)];
+  step[1] = output[1] + output[(8 - 2)];
+  step[2] = output[2] + output[(8 - 3)];
+  step[3] = output[3] + output[(8 - 4)];
+  step[4] = -output[4] + output[(8 - 5)];
+  step[5] = -output[5] + output[(8 - 6)];
+  step[6] = -output[6] + output[(8 - 7)];
+  step[7] = -output[7] + output[(8 - 8)];
+  step[8] = output[8];
+  step[9] = output[9];
+  step[10] = dct_32_round((-output[10] + output[13]) * cospi_16_64);
+  step[11] = dct_32_round((-output[11] + output[12]) * cospi_16_64);
+  step[12] = dct_32_round((output[12] + output[11]) * cospi_16_64);
+  step[13] = dct_32_round((output[13] + output[10]) * cospi_16_64);
+  step[14] = output[14];
+  step[15] = output[15];
+
+  step[16] = output[16] + output[23];
+  step[17] = output[17] + output[22];
+  step[18] = output[18] + output[21];
+  step[19] = output[19] + output[20];
+  step[20] = -output[20] + output[19];
+  step[21] = -output[21] + output[18];
+  step[22] = -output[22] + output[17];
+  step[23] = -output[23] + output[16];
+  step[24] = -output[24] + output[31];
+  step[25] = -output[25] + output[30];
+  step[26] = -output[26] + output[29];
+  step[27] = -output[27] + output[28];
+  step[28] = output[28] + output[27];
+  step[29] = output[29] + output[26];
+  step[30] = output[30] + output[25];
+  step[31] = output[31] + output[24];
+
+  // Stage 4
+  output[0] = step[0] + step[3];
+  output[1] = step[1] + step[2];
+  output[2] = -step[2] + step[1];
+  output[3] = -step[3] + step[0];
+  output[4] = step[4];
+  output[5] = dct_32_round((-step[5] + step[6]) * cospi_16_64);
+  output[6] = dct_32_round((step[6] + step[5]) * cospi_16_64);
+  output[7] = step[7];
+  output[8] = step[8] + step[11];
+  output[9] = step[9] + step[10];
+  output[10] = -step[10] + step[9];
+  output[11] = -step[11] + step[8];
+  output[12] = -step[12] + step[15];
+  output[13] = -step[13] + step[14];
+  output[14] = step[14] + step[13];
+  output[15] = step[15] + step[12];
+
+  output[16] = step[16];
+  output[17] = step[17];
+  output[18] = dct_32_round(step[18] * -cospi_8_64 + step[29] * cospi_24_64);
+  output[19] = dct_32_round(step[19] * -cospi_8_64 + step[28] * cospi_24_64);
+  output[20] = dct_32_round(step[20] * -cospi_24_64 + step[27] * -cospi_8_64);
+  output[21] = dct_32_round(step[21] * -cospi_24_64 + step[26] * -cospi_8_64);
+  output[22] = step[22];
+  output[23] = step[23];
+  output[24] = step[24];
+  output[25] = step[25];
+  output[26] = dct_32_round(step[26] * cospi_24_64 + step[21] * -cospi_8_64);
+  output[27] = dct_32_round(step[27] * cospi_24_64 + step[20] * -cospi_8_64);
+  output[28] = dct_32_round(step[28] * cospi_8_64 + step[19] * cospi_24_64);
+  output[29] = dct_32_round(step[29] * cospi_8_64 + step[18] * cospi_24_64);
+  output[30] = step[30];
+  output[31] = step[31];
+
+  // Stage 5
+  step[0] = dct_32_round((output[0] + output[1]) * cospi_16_64);
+  step[1] = dct_32_round((-output[1] + output[0]) * cospi_16_64);
+  step[2] = dct_32_round(output[2] * cospi_24_64 + output[3] * cospi_8_64);
+  step[3] = dct_32_round(output[3] * cospi_24_64 - output[2] * cospi_8_64);
+  step[4] = output[4] + output[5];
+  step[5] = -output[5] + output[4];
+  step[6] = -output[6] + output[7];
+  step[7] = output[7] + output[6];
+  step[8] = output[8];
+  step[9] = dct_32_round(output[9] * -cospi_8_64 + output[14] * cospi_24_64);
+  step[10] = dct_32_round(output[10] * -cospi_24_64 + output[13] * -cospi_8_64);
+  step[11] = output[11];
+  step[12] = output[12];
+  step[13] = dct_32_round(output[13] * cospi_24_64 + output[10] * -cospi_8_64);
+  step[14] = dct_32_round(output[14] * cospi_8_64 + output[9] * cospi_24_64);
+  step[15] = output[15];
+
+  step[16] = output[16] + output[19];
+  step[17] = output[17] + output[18];
+  step[18] = -output[18] + output[17];
+  step[19] = -output[19] + output[16];
+  step[20] = -output[20] + output[23];
+  step[21] = -output[21] + output[22];
+  step[22] = output[22] + output[21];
+  step[23] = output[23] + output[20];
+  step[24] = output[24] + output[27];
+  step[25] = output[25] + output[26];
+  step[26] = -output[26] + output[25];
+  step[27] = -output[27] + output[24];
+  step[28] = -output[28] + output[31];
+  step[29] = -output[29] + output[30];
+  step[30] = output[30] + output[29];
+  step[31] = output[31] + output[28];
+
+  // Stage 6
+  output[0] = step[0];
+  output[1] = step[1];
+  output[2] = step[2];
+  output[3] = step[3];
+  output[4] = dct_32_round(step[4] * cospi_28_64 + step[7] * cospi_4_64);
+  output[5] = dct_32_round(step[5] * cospi_12_64 + step[6] * cospi_20_64);
+  output[6] = dct_32_round(step[6] * cospi_12_64 + step[5] * -cospi_20_64);
+  output[7] = dct_32_round(step[7] * cospi_28_64 + step[4] * -cospi_4_64);
+  output[8] = step[8] + step[9];
+  output[9] = -step[9] + step[8];
+  output[10] = -step[10] + step[11];
+  output[11] = step[11] + step[10];
+  output[12] = step[12] + step[13];
+  output[13] = -step[13] + step[12];
+  output[14] = -step[14] + step[15];
+  output[15] = step[15] + step[14];
+
+  output[16] = step[16];
+  output[17] = dct_32_round(step[17] * -cospi_4_64 + step[30] * cospi_28_64);
+  output[18] = dct_32_round(step[18] * -cospi_28_64 + step[29] * -cospi_4_64);
+  output[19] = step[19];
+  output[20] = step[20];
+  output[21] = dct_32_round(step[21] * -cospi_20_64 + step[26] * cospi_12_64);
+  output[22] = dct_32_round(step[22] * -cospi_12_64 + step[25] * -cospi_20_64);
+  output[23] = step[23];
+  output[24] = step[24];
+  output[25] = dct_32_round(step[25] * cospi_12_64 + step[22] * -cospi_20_64);
+  output[26] = dct_32_round(step[26] * cospi_20_64 + step[21] * cospi_12_64);
+  output[27] = step[27];
+  output[28] = step[28];
+  output[29] = dct_32_round(step[29] * cospi_28_64 + step[18] * -cospi_4_64);
+  output[30] = dct_32_round(step[30] * cospi_4_64 + step[17] * cospi_28_64);
+  output[31] = step[31];
+
+  // Stage 7
+  step[0] = output[0];
+  step[1] = output[1];
+  step[2] = output[2];
+  step[3] = output[3];
+  step[4] = output[4];
+  step[5] = output[5];
+  step[6] = output[6];
+  step[7] = output[7];
+  step[8] = dct_32_round(output[8] * cospi_30_64 + output[15] * cospi_2_64);
+  step[9] = dct_32_round(output[9] * cospi_14_64 + output[14] * cospi_18_64);
+  step[10] = dct_32_round(output[10] * cospi_22_64 + output[13] * cospi_10_64);
+  step[11] = dct_32_round(output[11] * cospi_6_64 + output[12] * cospi_26_64);
+  step[12] = dct_32_round(output[12] * cospi_6_64 + output[11] * -cospi_26_64);
+  step[13] = dct_32_round(output[13] * cospi_22_64 + output[10] * -cospi_10_64);
+  step[14] = dct_32_round(output[14] * cospi_14_64 + output[9] * -cospi_18_64);
+  step[15] = dct_32_round(output[15] * cospi_30_64 + output[8] * -cospi_2_64);
+
+  step[16] = output[16] + output[17];
+  step[17] = -output[17] + output[16];
+  step[18] = -output[18] + output[19];
+  step[19] = output[19] + output[18];
+  step[20] = output[20] + output[21];
+  step[21] = -output[21] + output[20];
+  step[22] = -output[22] + output[23];
+  step[23] = output[23] + output[22];
+  step[24] = output[24] + output[25];
+  step[25] = -output[25] + output[24];
+  step[26] = -output[26] + output[27];
+  step[27] = output[27] + output[26];
+  step[28] = output[28] + output[29];
+  step[29] = -output[29] + output[28];
+  step[30] = -output[30] + output[31];
+  step[31] = output[31] + output[30];
+
+  // Final stage --- outputs indices are bit-reversed.
+  output[0] = step[0];
+  output[16] = step[1];
+  output[8] = step[2];
+  output[24] = step[3];
+  output[4] = step[4];
+  output[20] = step[5];
+  output[12] = step[6];
+  output[28] = step[7];
+  output[2] = step[8];
+  output[18] = step[9];
+  output[10] = step[10];
+  output[26] = step[11];
+  output[6] = step[12];
+  output[22] = step[13];
+  output[14] = step[14];
+  output[30] = step[15];
+
+  output[1] = dct_32_round(step[16] * cospi_31_64 + step[31] * cospi_1_64);
+  output[17] = dct_32_round(step[17] * cospi_15_64 + step[30] * cospi_17_64);
+  output[9] = dct_32_round(step[18] * cospi_23_64 + step[29] * cospi_9_64);
+  output[25] = dct_32_round(step[19] * cospi_7_64 + step[28] * cospi_25_64);
+  output[5] = dct_32_round(step[20] * cospi_27_64 + step[27] * cospi_5_64);
+  output[21] = dct_32_round(step[21] * cospi_11_64 + step[26] * cospi_21_64);
+  output[13] = dct_32_round(step[22] * cospi_19_64 + step[25] * cospi_13_64);
+  output[29] = dct_32_round(step[23] * cospi_3_64 + step[24] * cospi_29_64);
+  output[3] = dct_32_round(step[24] * cospi_3_64 + step[23] * -cospi_29_64);
+  output[19] = dct_32_round(step[25] * cospi_19_64 + step[22] * -cospi_13_64);
+  output[11] = dct_32_round(step[26] * cospi_11_64 + step[21] * -cospi_21_64);
+  output[27] = dct_32_round(step[27] * cospi_27_64 + step[20] * -cospi_5_64);
+  output[7] = dct_32_round(step[28] * cospi_7_64 + step[19] * -cospi_25_64);
+  output[23] = dct_32_round(step[29] * cospi_23_64 + step[18] * -cospi_9_64);
+  output[15] = dct_32_round(step[30] * cospi_15_64 + step[17] * -cospi_17_64);
+  output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64);
+}
+
+void vp10_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) {
+  int i, j;
+  tran_high_t output[32 * 32];
+
+  // Columns
+  for (i = 0; i < 32; ++i) {
+    tran_high_t temp_in[32], temp_out[32];
+    for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4;
+    vp10_fdct32(temp_in, temp_out, 0);
+    for (j = 0; j < 32; ++j)
+      output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
+  }
+
+  // Rows
+  for (i = 0; i < 32; ++i) {
+    tran_high_t temp_in[32], temp_out[32];
+    for (j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32];
+    vp10_fdct32(temp_in, temp_out, 0);
+    for (j = 0; j < 32; ++j)
+      out[j + i * 32] =
+          (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
+  }
+}
+
+// Note that although we use dct_32_round in dct32 computation flow,
+// this 2d fdct32x32 for rate-distortion optimization loop is operating
+// within 16 bits precision.
+void vp10_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) {
+  int i, j;
+  tran_high_t output[32 * 32];
+
+  // Columns
+  for (i = 0; i < 32; ++i) {
+    tran_high_t temp_in[32], temp_out[32];
+    for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4;
+    vp10_fdct32(temp_in, temp_out, 0);
+    for (j = 0; j < 32; ++j)
+      // TODO(cd): see quality impact of only doing
+      //           output[j * 32 + i] = (temp_out[j] + 1) >> 2;
+      //           PS: also change code in vp10_dsp/x86/vp10_dct_sse2.c
+      output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
+  }
+
+  // Rows
+  for (i = 0; i < 32; ++i) {
+    tran_high_t temp_in[32], temp_out[32];
+    for (j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32];
+    vp10_fdct32(temp_in, temp_out, 1);
+    for (j = 0; j < 32; ++j) out[j + i * 32] = (tran_low_t)temp_out[j];
+  }
+}
+
+void vp10_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride) {
+  int r, c;
+  tran_low_t sum = 0;
+  for (r = 0; r < 32; ++r)
+    for (c = 0; c < 32; ++c) sum += input[r * stride + c];
+
+  output[0] = sum >> 3;
+  output[1] = 0;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_fdct4x4_c(const int16_t *input, tran_low_t *output,
+                           int stride) {
+  vp10_fdct4x4_c(input, output, stride);
+}
+
+void vp10_highbd_fdct8x8_c(const int16_t *input, tran_low_t *final_output,
+                           int stride) {
+  vp10_fdct8x8_c(input, final_output, stride);
+}
+
+void vp10_highbd_fdct8x8_1_c(const int16_t *input, tran_low_t *final_output,
+                             int stride) {
+  vp10_fdct8x8_1_c(input, final_output, stride);
+}
+
+void vp10_highbd_fdct16x16_c(const int16_t *input, tran_low_t *output,
+                             int stride) {
+  vp10_fdct16x16_c(input, output, stride);
+}
+
+void vp10_highbd_fdct16x16_1_c(const int16_t *input, tran_low_t *output,
+                               int stride) {
+  vp10_fdct16x16_1_c(input, output, stride);
+}
+
+void vp10_highbd_fdct32x32_c(const int16_t *input, tran_low_t *out,
+                             int stride) {
+  vp10_fdct32x32_c(input, out, stride);
+}
+
+void vp10_highbd_fdct32x32_rd_c(const int16_t *input, tran_low_t *out,
+                                int stride) {
+  vp10_fdct32x32_rd_c(input, out, stride);
+}
+
+void vp10_highbd_fdct32x32_1_c(const int16_t *input, tran_low_t *out,
+                               int stride) {
+  vp10_fdct32x32_1_c(input, out, stride);
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
diff --git a/av1/common/vp10_fwd_txfm.h b/av1/common/vp10_fwd_txfm.h
new file mode 100644
index 0000000..a0481d3
--- /dev/null
+++ b/av1/common/vp10_fwd_txfm.h
@@ -0,0 +1,18 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_VP10_FWD_TXFM_H_
+#define VP10_COMMON_VP10_FWD_TXFM_H_
+
+#include "aom_dsp/txfm_common.h"
+#include "aom_dsp/fwd_txfm.h"
+
+void vp10_fdct32(const tran_high_t *input, tran_high_t *output, int round);
+#endif  // VP10_COMMON_VP10_FWD_TXFM_H_
diff --git a/av1/common/vp10_fwd_txfm1d.c b/av1/common/vp10_fwd_txfm1d.c
new file mode 100644
index 0000000..6dff077
--- /dev/null
+++ b/av1/common/vp10_fwd_txfm1d.c
@@ -0,0 +1,1530 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include "av1/common/vp10_fwd_txfm1d.h"
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+#define range_check(stage, input, buf, size, bit)                         \
+  {                                                                       \
+    int i, j;                                                             \
+    for (i = 0; i < size; ++i) {                                          \
+      int buf_bit = get_max_bit(abs(buf[i])) + 1;                         \
+      if (buf_bit > bit) {                                                \
+        printf("======== %s %d overflow ========\n", __FILE__, __LINE__); \
+        printf("stage: %d node: %d\n", stage, i);                         \
+        printf("bit: %d buf_bit: %d buf[i]: %d\n", bit, buf_bit, buf[i]); \
+        printf("input:\n");                                               \
+        for (j = 0; j < size; j++) {                                      \
+          printf("%d,", input[j]);                                        \
+        }                                                                 \
+        printf("\n");                                                     \
+        assert(0);                                                        \
+      }                                                                   \
+    }                                                                     \
+  }
+#else
+#define range_check(stage, input, buf, size, bit) \
+  {                                               \
+    (void) stage;                                 \
+    (void) input;                                 \
+    (void) buf;                                   \
+    (void) size;                                  \
+    (void) bit;                                   \
+  }
+#endif
+
+void vp10_fdct4_new(const int32_t *input, int32_t *output,
+                    const int8_t *cos_bit, const int8_t *stage_range) {
+  const int32_t size = 4;
+  const int32_t *cospi;
+
+  int32_t stage = 0;
+  int32_t *bf0, *bf1;
+  int32_t step[4];
+
+  // stage 0;
+  range_check(stage, input, input, size, stage_range[stage]);
+
+  // stage 1;
+  stage++;
+  bf1 = output;
+  bf1[0] = input[0] + input[3];
+  bf1[1] = input[1] + input[2];
+  bf1[2] = -input[2] + input[1];
+  bf1[3] = -input[3] + input[0];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 2
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+  bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
+  bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 3
+  stage++;
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[2];
+  bf1[2] = bf0[1];
+  bf1[3] = bf0[3];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fdct8_new(const int32_t *input, int32_t *output,
+                    const int8_t *cos_bit, const int8_t *stage_range) {
+  const int32_t size = 8;
+  const int32_t *cospi;
+
+  int32_t stage = 0;
+  int32_t *bf0, *bf1;
+  int32_t step[8];
+
+  // stage 0;
+  range_check(stage, input, input, size, stage_range[stage]);
+
+  // stage 1;
+  stage++;
+  bf1 = output;
+  bf1[0] = input[0] + input[7];
+  bf1[1] = input[1] + input[6];
+  bf1[2] = input[2] + input[5];
+  bf1[3] = input[3] + input[4];
+  bf1[4] = -input[4] + input[3];
+  bf1[5] = -input[5] + input[2];
+  bf1[6] = -input[6] + input[1];
+  bf1[7] = -input[7] + input[0];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 2
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0] + bf0[3];
+  bf1[1] = bf0[1] + bf0[2];
+  bf1[2] = -bf0[2] + bf0[1];
+  bf1[3] = -bf0[3] + bf0[0];
+  bf1[4] = bf0[4];
+  bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+  bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
+  bf1[7] = bf0[7];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 3
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+  bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
+  bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
+  bf1[4] = bf0[4] + bf0[5];
+  bf1[5] = -bf0[5] + bf0[4];
+  bf1[6] = -bf0[6] + bf0[7];
+  bf1[7] = bf0[7] + bf0[6];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 4
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
+  bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
+  bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
+  bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 5
+  stage++;
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[4];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[6];
+  bf1[4] = bf0[1];
+  bf1[5] = bf0[5];
+  bf1[6] = bf0[3];
+  bf1[7] = bf0[7];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fdct16_new(const int32_t *input, int32_t *output,
+                     const int8_t *cos_bit, const int8_t *stage_range) {
+  const int32_t size = 16;
+  const int32_t *cospi;
+
+  int32_t stage = 0;
+  int32_t *bf0, *bf1;
+  int32_t step[16];
+
+  // stage 0;
+  range_check(stage, input, input, size, stage_range[stage]);
+
+  // stage 1;
+  stage++;
+  bf1 = output;
+  bf1[0] = input[0] + input[15];
+  bf1[1] = input[1] + input[14];
+  bf1[2] = input[2] + input[13];
+  bf1[3] = input[3] + input[12];
+  bf1[4] = input[4] + input[11];
+  bf1[5] = input[5] + input[10];
+  bf1[6] = input[6] + input[9];
+  bf1[7] = input[7] + input[8];
+  bf1[8] = -input[8] + input[7];
+  bf1[9] = -input[9] + input[6];
+  bf1[10] = -input[10] + input[5];
+  bf1[11] = -input[11] + input[4];
+  bf1[12] = -input[12] + input[3];
+  bf1[13] = -input[13] + input[2];
+  bf1[14] = -input[14] + input[1];
+  bf1[15] = -input[15] + input[0];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 2
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0] + bf0[7];
+  bf1[1] = bf0[1] + bf0[6];
+  bf1[2] = bf0[2] + bf0[5];
+  bf1[3] = bf0[3] + bf0[4];
+  bf1[4] = -bf0[4] + bf0[3];
+  bf1[5] = -bf0[5] + bf0[2];
+  bf1[6] = -bf0[6] + bf0[1];
+  bf1[7] = -bf0[7] + bf0[0];
+  bf1[8] = bf0[8];
+  bf1[9] = bf0[9];
+  bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+  bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+  bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit[stage]);
+  bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit[stage]);
+  bf1[14] = bf0[14];
+  bf1[15] = bf0[15];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 3
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[3];
+  bf1[1] = bf0[1] + bf0[2];
+  bf1[2] = -bf0[2] + bf0[1];
+  bf1[3] = -bf0[3] + bf0[0];
+  bf1[4] = bf0[4];
+  bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+  bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
+  bf1[7] = bf0[7];
+  bf1[8] = bf0[8] + bf0[11];
+  bf1[9] = bf0[9] + bf0[10];
+  bf1[10] = -bf0[10] + bf0[9];
+  bf1[11] = -bf0[11] + bf0[8];
+  bf1[12] = -bf0[12] + bf0[15];
+  bf1[13] = -bf0[13] + bf0[14];
+  bf1[14] = bf0[14] + bf0[13];
+  bf1[15] = bf0[15] + bf0[12];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 4
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+  bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
+  bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
+  bf1[4] = bf0[4] + bf0[5];
+  bf1[5] = -bf0[5] + bf0[4];
+  bf1[6] = -bf0[6] + bf0[7];
+  bf1[7] = bf0[7] + bf0[6];
+  bf1[8] = bf0[8];
+  bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
+  bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+  bf1[11] = bf0[11];
+  bf1[12] = bf0[12];
+  bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit[stage]);
+  bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit[stage]);
+  bf1[15] = bf0[15];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 5
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
+  bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
+  bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
+  bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
+  bf1[8] = bf0[8] + bf0[9];
+  bf1[9] = -bf0[9] + bf0[8];
+  bf1[10] = -bf0[10] + bf0[11];
+  bf1[11] = bf0[11] + bf0[10];
+  bf1[12] = bf0[12] + bf0[13];
+  bf1[13] = -bf0[13] + bf0[12];
+  bf1[14] = -bf0[14] + bf0[15];
+  bf1[15] = bf0[15] + bf0[14];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 6
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[5];
+  bf1[6] = bf0[6];
+  bf1[7] = bf0[7];
+  bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit[stage]);
+  bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit[stage]);
+  bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit[stage]);
+  bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit[stage]);
+  bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit[stage]);
+  bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit[stage]);
+  bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit[stage]);
+  bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 7
+  stage++;
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[8];
+  bf1[2] = bf0[4];
+  bf1[3] = bf0[12];
+  bf1[4] = bf0[2];
+  bf1[5] = bf0[10];
+  bf1[6] = bf0[6];
+  bf1[7] = bf0[14];
+  bf1[8] = bf0[1];
+  bf1[9] = bf0[9];
+  bf1[10] = bf0[5];
+  bf1[11] = bf0[13];
+  bf1[12] = bf0[3];
+  bf1[13] = bf0[11];
+  bf1[14] = bf0[7];
+  bf1[15] = bf0[15];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fdct32_new(const int32_t *input, int32_t *output,
+                     const int8_t *cos_bit, const int8_t *stage_range) {
+  const int32_t size = 32;
+  const int32_t *cospi;
+
+  int32_t stage = 0;
+  int32_t *bf0, *bf1;
+  int32_t step[32];
+
+  // stage 0;
+  range_check(stage, input, input, size, stage_range[stage]);
+
+  // stage 1;
+  stage++;
+  bf1 = output;
+  bf1[0] = input[0] + input[31];
+  bf1[1] = input[1] + input[30];
+  bf1[2] = input[2] + input[29];
+  bf1[3] = input[3] + input[28];
+  bf1[4] = input[4] + input[27];
+  bf1[5] = input[5] + input[26];
+  bf1[6] = input[6] + input[25];
+  bf1[7] = input[7] + input[24];
+  bf1[8] = input[8] + input[23];
+  bf1[9] = input[9] + input[22];
+  bf1[10] = input[10] + input[21];
+  bf1[11] = input[11] + input[20];
+  bf1[12] = input[12] + input[19];
+  bf1[13] = input[13] + input[18];
+  bf1[14] = input[14] + input[17];
+  bf1[15] = input[15] + input[16];
+  bf1[16] = -input[16] + input[15];
+  bf1[17] = -input[17] + input[14];
+  bf1[18] = -input[18] + input[13];
+  bf1[19] = -input[19] + input[12];
+  bf1[20] = -input[20] + input[11];
+  bf1[21] = -input[21] + input[10];
+  bf1[22] = -input[22] + input[9];
+  bf1[23] = -input[23] + input[8];
+  bf1[24] = -input[24] + input[7];
+  bf1[25] = -input[25] + input[6];
+  bf1[26] = -input[26] + input[5];
+  bf1[27] = -input[27] + input[4];
+  bf1[28] = -input[28] + input[3];
+  bf1[29] = -input[29] + input[2];
+  bf1[30] = -input[30] + input[1];
+  bf1[31] = -input[31] + input[0];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 2
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0] + bf0[15];
+  bf1[1] = bf0[1] + bf0[14];
+  bf1[2] = bf0[2] + bf0[13];
+  bf1[3] = bf0[3] + bf0[12];
+  bf1[4] = bf0[4] + bf0[11];
+  bf1[5] = bf0[5] + bf0[10];
+  bf1[6] = bf0[6] + bf0[9];
+  bf1[7] = bf0[7] + bf0[8];
+  bf1[8] = -bf0[8] + bf0[7];
+  bf1[9] = -bf0[9] + bf0[6];
+  bf1[10] = -bf0[10] + bf0[5];
+  bf1[11] = -bf0[11] + bf0[4];
+  bf1[12] = -bf0[12] + bf0[3];
+  bf1[13] = -bf0[13] + bf0[2];
+  bf1[14] = -bf0[14] + bf0[1];
+  bf1[15] = -bf0[15] + bf0[0];
+  bf1[16] = bf0[16];
+  bf1[17] = bf0[17];
+  bf1[18] = bf0[18];
+  bf1[19] = bf0[19];
+  bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+  bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
+  bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
+  bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
+  bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit[stage]);
+  bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit[stage]);
+  bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit[stage]);
+  bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit[stage]);
+  bf1[28] = bf0[28];
+  bf1[29] = bf0[29];
+  bf1[30] = bf0[30];
+  bf1[31] = bf0[31];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 3
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[7];
+  bf1[1] = bf0[1] + bf0[6];
+  bf1[2] = bf0[2] + bf0[5];
+  bf1[3] = bf0[3] + bf0[4];
+  bf1[4] = -bf0[4] + bf0[3];
+  bf1[5] = -bf0[5] + bf0[2];
+  bf1[6] = -bf0[6] + bf0[1];
+  bf1[7] = -bf0[7] + bf0[0];
+  bf1[8] = bf0[8];
+  bf1[9] = bf0[9];
+  bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+  bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+  bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit[stage]);
+  bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit[stage]);
+  bf1[14] = bf0[14];
+  bf1[15] = bf0[15];
+  bf1[16] = bf0[16] + bf0[23];
+  bf1[17] = bf0[17] + bf0[22];
+  bf1[18] = bf0[18] + bf0[21];
+  bf1[19] = bf0[19] + bf0[20];
+  bf1[20] = -bf0[20] + bf0[19];
+  bf1[21] = -bf0[21] + bf0[18];
+  bf1[22] = -bf0[22] + bf0[17];
+  bf1[23] = -bf0[23] + bf0[16];
+  bf1[24] = -bf0[24] + bf0[31];
+  bf1[25] = -bf0[25] + bf0[30];
+  bf1[26] = -bf0[26] + bf0[29];
+  bf1[27] = -bf0[27] + bf0[28];
+  bf1[28] = bf0[28] + bf0[27];
+  bf1[29] = bf0[29] + bf0[26];
+  bf1[30] = bf0[30] + bf0[25];
+  bf1[31] = bf0[31] + bf0[24];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 4
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0] + bf0[3];
+  bf1[1] = bf0[1] + bf0[2];
+  bf1[2] = -bf0[2] + bf0[1];
+  bf1[3] = -bf0[3] + bf0[0];
+  bf1[4] = bf0[4];
+  bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+  bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
+  bf1[7] = bf0[7];
+  bf1[8] = bf0[8] + bf0[11];
+  bf1[9] = bf0[9] + bf0[10];
+  bf1[10] = -bf0[10] + bf0[9];
+  bf1[11] = -bf0[11] + bf0[8];
+  bf1[12] = -bf0[12] + bf0[15];
+  bf1[13] = -bf0[13] + bf0[14];
+  bf1[14] = bf0[14] + bf0[13];
+  bf1[15] = bf0[15] + bf0[12];
+  bf1[16] = bf0[16];
+  bf1[17] = bf0[17];
+  bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
+  bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
+  bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
+  bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
+  bf1[22] = bf0[22];
+  bf1[23] = bf0[23];
+  bf1[24] = bf0[24];
+  bf1[25] = bf0[25];
+  bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit[stage]);
+  bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit[stage]);
+  bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit[stage]);
+  bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit[stage]);
+  bf1[30] = bf0[30];
+  bf1[31] = bf0[31];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 5
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+  bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
+  bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
+  bf1[4] = bf0[4] + bf0[5];
+  bf1[5] = -bf0[5] + bf0[4];
+  bf1[6] = -bf0[6] + bf0[7];
+  bf1[7] = bf0[7] + bf0[6];
+  bf1[8] = bf0[8];
+  bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
+  bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+  bf1[11] = bf0[11];
+  bf1[12] = bf0[12];
+  bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit[stage]);
+  bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit[stage]);
+  bf1[15] = bf0[15];
+  bf1[16] = bf0[16] + bf0[19];
+  bf1[17] = bf0[17] + bf0[18];
+  bf1[18] = -bf0[18] + bf0[17];
+  bf1[19] = -bf0[19] + bf0[16];
+  bf1[20] = -bf0[20] + bf0[23];
+  bf1[21] = -bf0[21] + bf0[22];
+  bf1[22] = bf0[22] + bf0[21];
+  bf1[23] = bf0[23] + bf0[20];
+  bf1[24] = bf0[24] + bf0[27];
+  bf1[25] = bf0[25] + bf0[26];
+  bf1[26] = -bf0[26] + bf0[25];
+  bf1[27] = -bf0[27] + bf0[24];
+  bf1[28] = -bf0[28] + bf0[31];
+  bf1[29] = -bf0[29] + bf0[30];
+  bf1[30] = bf0[30] + bf0[29];
+  bf1[31] = bf0[31] + bf0[28];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 6
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
+  bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
+  bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
+  bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
+  bf1[8] = bf0[8] + bf0[9];
+  bf1[9] = -bf0[9] + bf0[8];
+  bf1[10] = -bf0[10] + bf0[11];
+  bf1[11] = bf0[11] + bf0[10];
+  bf1[12] = bf0[12] + bf0[13];
+  bf1[13] = -bf0[13] + bf0[12];
+  bf1[14] = -bf0[14] + bf0[15];
+  bf1[15] = bf0[15] + bf0[14];
+  bf1[16] = bf0[16];
+  bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
+  bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
+  bf1[19] = bf0[19];
+  bf1[20] = bf0[20];
+  bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
+  bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
+  bf1[23] = bf0[23];
+  bf1[24] = bf0[24];
+  bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit[stage]);
+  bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit[stage]);
+  bf1[27] = bf0[27];
+  bf1[28] = bf0[28];
+  bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit[stage]);
+  bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit[stage]);
+  bf1[31] = bf0[31];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 7
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[5];
+  bf1[6] = bf0[6];
+  bf1[7] = bf0[7];
+  bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit[stage]);
+  bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit[stage]);
+  bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit[stage]);
+  bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit[stage]);
+  bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit[stage]);
+  bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit[stage]);
+  bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit[stage]);
+  bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit[stage]);
+  bf1[16] = bf0[16] + bf0[17];
+  bf1[17] = -bf0[17] + bf0[16];
+  bf1[18] = -bf0[18] + bf0[19];
+  bf1[19] = bf0[19] + bf0[18];
+  bf1[20] = bf0[20] + bf0[21];
+  bf1[21] = -bf0[21] + bf0[20];
+  bf1[22] = -bf0[22] + bf0[23];
+  bf1[23] = bf0[23] + bf0[22];
+  bf1[24] = bf0[24] + bf0[25];
+  bf1[25] = -bf0[25] + bf0[24];
+  bf1[26] = -bf0[26] + bf0[27];
+  bf1[27] = bf0[27] + bf0[26];
+  bf1[28] = bf0[28] + bf0[29];
+  bf1[29] = -bf0[29] + bf0[28];
+  bf1[30] = -bf0[30] + bf0[31];
+  bf1[31] = bf0[31] + bf0[30];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 8
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[5];
+  bf1[6] = bf0[6];
+  bf1[7] = bf0[7];
+  bf1[8] = bf0[8];
+  bf1[9] = bf0[9];
+  bf1[10] = bf0[10];
+  bf1[11] = bf0[11];
+  bf1[12] = bf0[12];
+  bf1[13] = bf0[13];
+  bf1[14] = bf0[14];
+  bf1[15] = bf0[15];
+  bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit[stage]);
+  bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit[stage]);
+  bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit[stage]);
+  bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit[stage]);
+  bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit[stage]);
+  bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit[stage]);
+  bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit[stage]);
+  bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit[stage]);
+  bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit[stage]);
+  bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit[stage]);
+  bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit[stage]);
+  bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit[stage]);
+  bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit[stage]);
+  bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit[stage]);
+  bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit[stage]);
+  bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 9
+  stage++;
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[16];
+  bf1[2] = bf0[8];
+  bf1[3] = bf0[24];
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[20];
+  bf1[6] = bf0[12];
+  bf1[7] = bf0[28];
+  bf1[8] = bf0[2];
+  bf1[9] = bf0[18];
+  bf1[10] = bf0[10];
+  bf1[11] = bf0[26];
+  bf1[12] = bf0[6];
+  bf1[13] = bf0[22];
+  bf1[14] = bf0[14];
+  bf1[15] = bf0[30];
+  bf1[16] = bf0[1];
+  bf1[17] = bf0[17];
+  bf1[18] = bf0[9];
+  bf1[19] = bf0[25];
+  bf1[20] = bf0[5];
+  bf1[21] = bf0[21];
+  bf1[22] = bf0[13];
+  bf1[23] = bf0[29];
+  bf1[24] = bf0[3];
+  bf1[25] = bf0[19];
+  bf1[26] = bf0[11];
+  bf1[27] = bf0[27];
+  bf1[28] = bf0[7];
+  bf1[29] = bf0[23];
+  bf1[30] = bf0[15];
+  bf1[31] = bf0[31];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fadst4_new(const int32_t *input, int32_t *output,
+                     const int8_t *cos_bit, const int8_t *stage_range) {
+  const int32_t size = 4;
+  const int32_t *cospi;
+
+  int32_t stage = 0;
+  int32_t *bf0, *bf1;
+  int32_t step[4];
+
+  // stage 0;
+  range_check(stage, input, input, size, stage_range[stage]);
+
+  // stage 1;
+  stage++;
+  bf1 = output;
+  bf1[0] = input[3];
+  bf1[1] = input[0];
+  bf1[2] = input[1];
+  bf1[3] = input[2];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 2
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = half_btf(cospi[8], bf0[0], cospi[56], bf0[1], cos_bit[stage]);
+  bf1[1] = half_btf(-cospi[8], bf0[1], cospi[56], bf0[0], cos_bit[stage]);
+  bf1[2] = half_btf(cospi[40], bf0[2], cospi[24], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(-cospi[40], bf0[3], cospi[24], bf0[2], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 3
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[2];
+  bf1[1] = bf0[1] + bf0[3];
+  bf1[2] = -bf0[2] + bf0[0];
+  bf1[3] = -bf0[3] + bf0[1];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 4
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 5
+  stage++;
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0];
+  bf1[1] = -bf0[2];
+  bf1[2] = bf0[3];
+  bf1[3] = -bf0[1];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fadst8_new(const int32_t *input, int32_t *output,
+                     const int8_t *cos_bit, const int8_t *stage_range) {
+  const int32_t size = 8;
+  const int32_t *cospi;
+
+  int32_t stage = 0;
+  int32_t *bf0, *bf1;
+  int32_t step[8];
+
+  // stage 0;
+  range_check(stage, input, input, size, stage_range[stage]);
+
+  // stage 1;
+  stage++;
+  bf1 = output;
+  bf1[0] = input[7];
+  bf1[1] = input[0];
+  bf1[2] = input[5];
+  bf1[3] = input[2];
+  bf1[4] = input[3];
+  bf1[5] = input[4];
+  bf1[6] = input[1];
+  bf1[7] = input[6];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 2
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit[stage]);
+  bf1[1] = half_btf(-cospi[4], bf0[1], cospi[60], bf0[0], cos_bit[stage]);
+  bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(-cospi[20], bf0[3], cospi[44], bf0[2], cos_bit[stage]);
+  bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit[stage]);
+  bf1[5] = half_btf(-cospi[36], bf0[5], cospi[28], bf0[4], cos_bit[stage]);
+  bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit[stage]);
+  bf1[7] = half_btf(-cospi[52], bf0[7], cospi[12], bf0[6], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 3
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[4];
+  bf1[1] = bf0[1] + bf0[5];
+  bf1[2] = bf0[2] + bf0[6];
+  bf1[3] = bf0[3] + bf0[7];
+  bf1[4] = -bf0[4] + bf0[0];
+  bf1[5] = -bf0[5] + bf0[1];
+  bf1[6] = -bf0[6] + bf0[2];
+  bf1[7] = -bf0[7] + bf0[3];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 4
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
+  bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit[stage]);
+  bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
+  bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 5
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[2];
+  bf1[1] = bf0[1] + bf0[3];
+  bf1[2] = -bf0[2] + bf0[0];
+  bf1[3] = -bf0[3] + bf0[1];
+  bf1[4] = bf0[4] + bf0[6];
+  bf1[5] = bf0[5] + bf0[7];
+  bf1[6] = -bf0[6] + bf0[4];
+  bf1[7] = -bf0[7] + bf0[5];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 6
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[5];
+  bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
+  bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 7
+  stage++;
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0];
+  bf1[1] = -bf0[4];
+  bf1[2] = bf0[6];
+  bf1[3] = -bf0[2];
+  bf1[4] = bf0[3];
+  bf1[5] = -bf0[7];
+  bf1[6] = bf0[5];
+  bf1[7] = -bf0[1];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fadst16_new(const int32_t *input, int32_t *output,
+                      const int8_t *cos_bit, const int8_t *stage_range) {
+  const int32_t size = 16;
+  const int32_t *cospi;
+
+  int32_t stage = 0;
+  int32_t *bf0, *bf1;
+  int32_t step[16];
+
+  // stage 0;
+  range_check(stage, input, input, size, stage_range[stage]);
+
+  // stage 1;
+  stage++;
+  bf1 = output;
+  bf1[0] = input[15];
+  bf1[1] = input[0];
+  bf1[2] = input[13];
+  bf1[3] = input[2];
+  bf1[4] = input[11];
+  bf1[5] = input[4];
+  bf1[6] = input[9];
+  bf1[7] = input[6];
+  bf1[8] = input[7];
+  bf1[9] = input[8];
+  bf1[10] = input[5];
+  bf1[11] = input[10];
+  bf1[12] = input[3];
+  bf1[13] = input[12];
+  bf1[14] = input[1];
+  bf1[15] = input[14];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 2
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit[stage]);
+  bf1[1] = half_btf(-cospi[2], bf0[1], cospi[62], bf0[0], cos_bit[stage]);
+  bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(-cospi[10], bf0[3], cospi[54], bf0[2], cos_bit[stage]);
+  bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit[stage]);
+  bf1[5] = half_btf(-cospi[18], bf0[5], cospi[46], bf0[4], cos_bit[stage]);
+  bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit[stage]);
+  bf1[7] = half_btf(-cospi[26], bf0[7], cospi[38], bf0[6], cos_bit[stage]);
+  bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit[stage]);
+  bf1[9] = half_btf(-cospi[34], bf0[9], cospi[30], bf0[8], cos_bit[stage]);
+  bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit[stage]);
+  bf1[11] = half_btf(-cospi[42], bf0[11], cospi[22], bf0[10], cos_bit[stage]);
+  bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit[stage]);
+  bf1[13] = half_btf(-cospi[50], bf0[13], cospi[14], bf0[12], cos_bit[stage]);
+  bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit[stage]);
+  bf1[15] = half_btf(-cospi[58], bf0[15], cospi[6], bf0[14], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 3
+  stage++;
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[8];
+  bf1[1] = bf0[1] + bf0[9];
+  bf1[2] = bf0[2] + bf0[10];
+  bf1[3] = bf0[3] + bf0[11];
+  bf1[4] = bf0[4] + bf0[12];
+  bf1[5] = bf0[5] + bf0[13];
+  bf1[6] = bf0[6] + bf0[14];
+  bf1[7] = bf0[7] + bf0[15];
+  bf1[8] = -bf0[8] + bf0[0];
+  bf1[9] = -bf0[9] + bf0[1];
+  bf1[10] = -bf0[10] + bf0[2];
+  bf1[11] = -bf0[11] + bf0[3];
+  bf1[12] = -bf0[12] + bf0[4];
+  bf1[13] = -bf0[13] + bf0[5];
+  bf1[14] = -bf0[14] + bf0[6];
+  bf1[15] = -bf0[15] + bf0[7];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 4
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[5];
+  bf1[6] = bf0[6];
+  bf1[7] = bf0[7];
+  bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
+  bf1[9] = half_btf(-cospi[8], bf0[9], cospi[56], bf0[8], cos_bit[stage]);
+  bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
+  bf1[11] = half_btf(-cospi[40], bf0[11], cospi[24], bf0[10], cos_bit[stage]);
+  bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
+  bf1[13] = half_btf(cospi[56], bf0[13], cospi[8], bf0[12], cos_bit[stage]);
+  bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
+  bf1[15] = half_btf(cospi[24], bf0[15], cospi[40], bf0[14], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 5
+  stage++;
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[4];
+  bf1[1] = bf0[1] + bf0[5];
+  bf1[2] = bf0[2] + bf0[6];
+  bf1[3] = bf0[3] + bf0[7];
+  bf1[4] = -bf0[4] + bf0[0];
+  bf1[5] = -bf0[5] + bf0[1];
+  bf1[6] = -bf0[6] + bf0[2];
+  bf1[7] = -bf0[7] + bf0[3];
+  bf1[8] = bf0[8] + bf0[12];
+  bf1[9] = bf0[9] + bf0[13];
+  bf1[10] = bf0[10] + bf0[14];
+  bf1[11] = bf0[11] + bf0[15];
+  bf1[12] = -bf0[12] + bf0[8];
+  bf1[13] = -bf0[13] + bf0[9];
+  bf1[14] = -bf0[14] + bf0[10];
+  bf1[15] = -bf0[15] + bf0[11];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 6
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
+  bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit[stage]);
+  bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
+  bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit[stage]);
+  bf1[8] = bf0[8];
+  bf1[9] = bf0[9];
+  bf1[10] = bf0[10];
+  bf1[11] = bf0[11];
+  bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
+  bf1[13] = half_btf(-cospi[16], bf0[13], cospi[48], bf0[12], cos_bit[stage]);
+  bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
+  bf1[15] = half_btf(cospi[48], bf0[15], cospi[16], bf0[14], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 7
+  stage++;
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[2];
+  bf1[1] = bf0[1] + bf0[3];
+  bf1[2] = -bf0[2] + bf0[0];
+  bf1[3] = -bf0[3] + bf0[1];
+  bf1[4] = bf0[4] + bf0[6];
+  bf1[5] = bf0[5] + bf0[7];
+  bf1[6] = -bf0[6] + bf0[4];
+  bf1[7] = -bf0[7] + bf0[5];
+  bf1[8] = bf0[8] + bf0[10];
+  bf1[9] = bf0[9] + bf0[11];
+  bf1[10] = -bf0[10] + bf0[8];
+  bf1[11] = -bf0[11] + bf0[9];
+  bf1[12] = bf0[12] + bf0[14];
+  bf1[13] = bf0[13] + bf0[15];
+  bf1[14] = -bf0[14] + bf0[12];
+  bf1[15] = -bf0[15] + bf0[13];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 8
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[5];
+  bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
+  bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit[stage]);
+  bf1[8] = bf0[8];
+  bf1[9] = bf0[9];
+  bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
+  bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[10], cos_bit[stage]);
+  bf1[12] = bf0[12];
+  bf1[13] = bf0[13];
+  bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
+  bf1[15] = half_btf(-cospi[32], bf0[15], cospi[32], bf0[14], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 9
+  stage++;
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0];
+  bf1[1] = -bf0[8];
+  bf1[2] = bf0[12];
+  bf1[3] = -bf0[4];
+  bf1[4] = bf0[6];
+  bf1[5] = -bf0[14];
+  bf1[6] = bf0[10];
+  bf1[7] = -bf0[2];
+  bf1[8] = bf0[3];
+  bf1[9] = -bf0[11];
+  bf1[10] = bf0[15];
+  bf1[11] = -bf0[7];
+  bf1[12] = bf0[5];
+  bf1[13] = -bf0[13];
+  bf1[14] = bf0[9];
+  bf1[15] = -bf0[1];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fadst32_new(const int32_t *input, int32_t *output,
+                      const int8_t *cos_bit, const int8_t *stage_range) {
+  const int32_t size = 32;
+  const int32_t *cospi;
+
+  int32_t stage = 0;
+  int32_t *bf0, *bf1;
+  int32_t step[32];
+
+  // stage 0;
+  range_check(stage, input, input, size, stage_range[stage]);
+
+  // stage 1;
+  stage++;
+  bf1 = output;
+  bf1[0] = input[31];
+  bf1[1] = input[0];
+  bf1[2] = input[29];
+  bf1[3] = input[2];
+  bf1[4] = input[27];
+  bf1[5] = input[4];
+  bf1[6] = input[25];
+  bf1[7] = input[6];
+  bf1[8] = input[23];
+  bf1[9] = input[8];
+  bf1[10] = input[21];
+  bf1[11] = input[10];
+  bf1[12] = input[19];
+  bf1[13] = input[12];
+  bf1[14] = input[17];
+  bf1[15] = input[14];
+  bf1[16] = input[15];
+  bf1[17] = input[16];
+  bf1[18] = input[13];
+  bf1[19] = input[18];
+  bf1[20] = input[11];
+  bf1[21] = input[20];
+  bf1[22] = input[9];
+  bf1[23] = input[22];
+  bf1[24] = input[7];
+  bf1[25] = input[24];
+  bf1[26] = input[5];
+  bf1[27] = input[26];
+  bf1[28] = input[3];
+  bf1[29] = input[28];
+  bf1[30] = input[1];
+  bf1[31] = input[30];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 2
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = half_btf(cospi[1], bf0[0], cospi[63], bf0[1], cos_bit[stage]);
+  bf1[1] = half_btf(-cospi[1], bf0[1], cospi[63], bf0[0], cos_bit[stage]);
+  bf1[2] = half_btf(cospi[5], bf0[2], cospi[59], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(-cospi[5], bf0[3], cospi[59], bf0[2], cos_bit[stage]);
+  bf1[4] = half_btf(cospi[9], bf0[4], cospi[55], bf0[5], cos_bit[stage]);
+  bf1[5] = half_btf(-cospi[9], bf0[5], cospi[55], bf0[4], cos_bit[stage]);
+  bf1[6] = half_btf(cospi[13], bf0[6], cospi[51], bf0[7], cos_bit[stage]);
+  bf1[7] = half_btf(-cospi[13], bf0[7], cospi[51], bf0[6], cos_bit[stage]);
+  bf1[8] = half_btf(cospi[17], bf0[8], cospi[47], bf0[9], cos_bit[stage]);
+  bf1[9] = half_btf(-cospi[17], bf0[9], cospi[47], bf0[8], cos_bit[stage]);
+  bf1[10] = half_btf(cospi[21], bf0[10], cospi[43], bf0[11], cos_bit[stage]);
+  bf1[11] = half_btf(-cospi[21], bf0[11], cospi[43], bf0[10], cos_bit[stage]);
+  bf1[12] = half_btf(cospi[25], bf0[12], cospi[39], bf0[13], cos_bit[stage]);
+  bf1[13] = half_btf(-cospi[25], bf0[13], cospi[39], bf0[12], cos_bit[stage]);
+  bf1[14] = half_btf(cospi[29], bf0[14], cospi[35], bf0[15], cos_bit[stage]);
+  bf1[15] = half_btf(-cospi[29], bf0[15], cospi[35], bf0[14], cos_bit[stage]);
+  bf1[16] = half_btf(cospi[33], bf0[16], cospi[31], bf0[17], cos_bit[stage]);
+  bf1[17] = half_btf(-cospi[33], bf0[17], cospi[31], bf0[16], cos_bit[stage]);
+  bf1[18] = half_btf(cospi[37], bf0[18], cospi[27], bf0[19], cos_bit[stage]);
+  bf1[19] = half_btf(-cospi[37], bf0[19], cospi[27], bf0[18], cos_bit[stage]);
+  bf1[20] = half_btf(cospi[41], bf0[20], cospi[23], bf0[21], cos_bit[stage]);
+  bf1[21] = half_btf(-cospi[41], bf0[21], cospi[23], bf0[20], cos_bit[stage]);
+  bf1[22] = half_btf(cospi[45], bf0[22], cospi[19], bf0[23], cos_bit[stage]);
+  bf1[23] = half_btf(-cospi[45], bf0[23], cospi[19], bf0[22], cos_bit[stage]);
+  bf1[24] = half_btf(cospi[49], bf0[24], cospi[15], bf0[25], cos_bit[stage]);
+  bf1[25] = half_btf(-cospi[49], bf0[25], cospi[15], bf0[24], cos_bit[stage]);
+  bf1[26] = half_btf(cospi[53], bf0[26], cospi[11], bf0[27], cos_bit[stage]);
+  bf1[27] = half_btf(-cospi[53], bf0[27], cospi[11], bf0[26], cos_bit[stage]);
+  bf1[28] = half_btf(cospi[57], bf0[28], cospi[7], bf0[29], cos_bit[stage]);
+  bf1[29] = half_btf(-cospi[57], bf0[29], cospi[7], bf0[28], cos_bit[stage]);
+  bf1[30] = half_btf(cospi[61], bf0[30], cospi[3], bf0[31], cos_bit[stage]);
+  bf1[31] = half_btf(-cospi[61], bf0[31], cospi[3], bf0[30], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 3
+  stage++;
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[16];
+  bf1[1] = bf0[1] + bf0[17];
+  bf1[2] = bf0[2] + bf0[18];
+  bf1[3] = bf0[3] + bf0[19];
+  bf1[4] = bf0[4] + bf0[20];
+  bf1[5] = bf0[5] + bf0[21];
+  bf1[6] = bf0[6] + bf0[22];
+  bf1[7] = bf0[7] + bf0[23];
+  bf1[8] = bf0[8] + bf0[24];
+  bf1[9] = bf0[9] + bf0[25];
+  bf1[10] = bf0[10] + bf0[26];
+  bf1[11] = bf0[11] + bf0[27];
+  bf1[12] = bf0[12] + bf0[28];
+  bf1[13] = bf0[13] + bf0[29];
+  bf1[14] = bf0[14] + bf0[30];
+  bf1[15] = bf0[15] + bf0[31];
+  bf1[16] = -bf0[16] + bf0[0];
+  bf1[17] = -bf0[17] + bf0[1];
+  bf1[18] = -bf0[18] + bf0[2];
+  bf1[19] = -bf0[19] + bf0[3];
+  bf1[20] = -bf0[20] + bf0[4];
+  bf1[21] = -bf0[21] + bf0[5];
+  bf1[22] = -bf0[22] + bf0[6];
+  bf1[23] = -bf0[23] + bf0[7];
+  bf1[24] = -bf0[24] + bf0[8];
+  bf1[25] = -bf0[25] + bf0[9];
+  bf1[26] = -bf0[26] + bf0[10];
+  bf1[27] = -bf0[27] + bf0[11];
+  bf1[28] = -bf0[28] + bf0[12];
+  bf1[29] = -bf0[29] + bf0[13];
+  bf1[30] = -bf0[30] + bf0[14];
+  bf1[31] = -bf0[31] + bf0[15];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 4
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[5];
+  bf1[6] = bf0[6];
+  bf1[7] = bf0[7];
+  bf1[8] = bf0[8];
+  bf1[9] = bf0[9];
+  bf1[10] = bf0[10];
+  bf1[11] = bf0[11];
+  bf1[12] = bf0[12];
+  bf1[13] = bf0[13];
+  bf1[14] = bf0[14];
+  bf1[15] = bf0[15];
+  bf1[16] = half_btf(cospi[4], bf0[16], cospi[60], bf0[17], cos_bit[stage]);
+  bf1[17] = half_btf(-cospi[4], bf0[17], cospi[60], bf0[16], cos_bit[stage]);
+  bf1[18] = half_btf(cospi[20], bf0[18], cospi[44], bf0[19], cos_bit[stage]);
+  bf1[19] = half_btf(-cospi[20], bf0[19], cospi[44], bf0[18], cos_bit[stage]);
+  bf1[20] = half_btf(cospi[36], bf0[20], cospi[28], bf0[21], cos_bit[stage]);
+  bf1[21] = half_btf(-cospi[36], bf0[21], cospi[28], bf0[20], cos_bit[stage]);
+  bf1[22] = half_btf(cospi[52], bf0[22], cospi[12], bf0[23], cos_bit[stage]);
+  bf1[23] = half_btf(-cospi[52], bf0[23], cospi[12], bf0[22], cos_bit[stage]);
+  bf1[24] = half_btf(-cospi[60], bf0[24], cospi[4], bf0[25], cos_bit[stage]);
+  bf1[25] = half_btf(cospi[60], bf0[25], cospi[4], bf0[24], cos_bit[stage]);
+  bf1[26] = half_btf(-cospi[44], bf0[26], cospi[20], bf0[27], cos_bit[stage]);
+  bf1[27] = half_btf(cospi[44], bf0[27], cospi[20], bf0[26], cos_bit[stage]);
+  bf1[28] = half_btf(-cospi[28], bf0[28], cospi[36], bf0[29], cos_bit[stage]);
+  bf1[29] = half_btf(cospi[28], bf0[29], cospi[36], bf0[28], cos_bit[stage]);
+  bf1[30] = half_btf(-cospi[12], bf0[30], cospi[52], bf0[31], cos_bit[stage]);
+  bf1[31] = half_btf(cospi[12], bf0[31], cospi[52], bf0[30], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 5
+  stage++;
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[8];
+  bf1[1] = bf0[1] + bf0[9];
+  bf1[2] = bf0[2] + bf0[10];
+  bf1[3] = bf0[3] + bf0[11];
+  bf1[4] = bf0[4] + bf0[12];
+  bf1[5] = bf0[5] + bf0[13];
+  bf1[6] = bf0[6] + bf0[14];
+  bf1[7] = bf0[7] + bf0[15];
+  bf1[8] = -bf0[8] + bf0[0];
+  bf1[9] = -bf0[9] + bf0[1];
+  bf1[10] = -bf0[10] + bf0[2];
+  bf1[11] = -bf0[11] + bf0[3];
+  bf1[12] = -bf0[12] + bf0[4];
+  bf1[13] = -bf0[13] + bf0[5];
+  bf1[14] = -bf0[14] + bf0[6];
+  bf1[15] = -bf0[15] + bf0[7];
+  bf1[16] = bf0[16] + bf0[24];
+  bf1[17] = bf0[17] + bf0[25];
+  bf1[18] = bf0[18] + bf0[26];
+  bf1[19] = bf0[19] + bf0[27];
+  bf1[20] = bf0[20] + bf0[28];
+  bf1[21] = bf0[21] + bf0[29];
+  bf1[22] = bf0[22] + bf0[30];
+  bf1[23] = bf0[23] + bf0[31];
+  bf1[24] = -bf0[24] + bf0[16];
+  bf1[25] = -bf0[25] + bf0[17];
+  bf1[26] = -bf0[26] + bf0[18];
+  bf1[27] = -bf0[27] + bf0[19];
+  bf1[28] = -bf0[28] + bf0[20];
+  bf1[29] = -bf0[29] + bf0[21];
+  bf1[30] = -bf0[30] + bf0[22];
+  bf1[31] = -bf0[31] + bf0[23];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 6
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[5];
+  bf1[6] = bf0[6];
+  bf1[7] = bf0[7];
+  bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
+  bf1[9] = half_btf(-cospi[8], bf0[9], cospi[56], bf0[8], cos_bit[stage]);
+  bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
+  bf1[11] = half_btf(-cospi[40], bf0[11], cospi[24], bf0[10], cos_bit[stage]);
+  bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
+  bf1[13] = half_btf(cospi[56], bf0[13], cospi[8], bf0[12], cos_bit[stage]);
+  bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
+  bf1[15] = half_btf(cospi[24], bf0[15], cospi[40], bf0[14], cos_bit[stage]);
+  bf1[16] = bf0[16];
+  bf1[17] = bf0[17];
+  bf1[18] = bf0[18];
+  bf1[19] = bf0[19];
+  bf1[20] = bf0[20];
+  bf1[21] = bf0[21];
+  bf1[22] = bf0[22];
+  bf1[23] = bf0[23];
+  bf1[24] = half_btf(cospi[8], bf0[24], cospi[56], bf0[25], cos_bit[stage]);
+  bf1[25] = half_btf(-cospi[8], bf0[25], cospi[56], bf0[24], cos_bit[stage]);
+  bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[27], cos_bit[stage]);
+  bf1[27] = half_btf(-cospi[40], bf0[27], cospi[24], bf0[26], cos_bit[stage]);
+  bf1[28] = half_btf(-cospi[56], bf0[28], cospi[8], bf0[29], cos_bit[stage]);
+  bf1[29] = half_btf(cospi[56], bf0[29], cospi[8], bf0[28], cos_bit[stage]);
+  bf1[30] = half_btf(-cospi[24], bf0[30], cospi[40], bf0[31], cos_bit[stage]);
+  bf1[31] = half_btf(cospi[24], bf0[31], cospi[40], bf0[30], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 7
+  stage++;
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[4];
+  bf1[1] = bf0[1] + bf0[5];
+  bf1[2] = bf0[2] + bf0[6];
+  bf1[3] = bf0[3] + bf0[7];
+  bf1[4] = -bf0[4] + bf0[0];
+  bf1[5] = -bf0[5] + bf0[1];
+  bf1[6] = -bf0[6] + bf0[2];
+  bf1[7] = -bf0[7] + bf0[3];
+  bf1[8] = bf0[8] + bf0[12];
+  bf1[9] = bf0[9] + bf0[13];
+  bf1[10] = bf0[10] + bf0[14];
+  bf1[11] = bf0[11] + bf0[15];
+  bf1[12] = -bf0[12] + bf0[8];
+  bf1[13] = -bf0[13] + bf0[9];
+  bf1[14] = -bf0[14] + bf0[10];
+  bf1[15] = -bf0[15] + bf0[11];
+  bf1[16] = bf0[16] + bf0[20];
+  bf1[17] = bf0[17] + bf0[21];
+  bf1[18] = bf0[18] + bf0[22];
+  bf1[19] = bf0[19] + bf0[23];
+  bf1[20] = -bf0[20] + bf0[16];
+  bf1[21] = -bf0[21] + bf0[17];
+  bf1[22] = -bf0[22] + bf0[18];
+  bf1[23] = -bf0[23] + bf0[19];
+  bf1[24] = bf0[24] + bf0[28];
+  bf1[25] = bf0[25] + bf0[29];
+  bf1[26] = bf0[26] + bf0[30];
+  bf1[27] = bf0[27] + bf0[31];
+  bf1[28] = -bf0[28] + bf0[24];
+  bf1[29] = -bf0[29] + bf0[25];
+  bf1[30] = -bf0[30] + bf0[26];
+  bf1[31] = -bf0[31] + bf0[27];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 8
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
+  bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit[stage]);
+  bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
+  bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit[stage]);
+  bf1[8] = bf0[8];
+  bf1[9] = bf0[9];
+  bf1[10] = bf0[10];
+  bf1[11] = bf0[11];
+  bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
+  bf1[13] = half_btf(-cospi[16], bf0[13], cospi[48], bf0[12], cos_bit[stage]);
+  bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
+  bf1[15] = half_btf(cospi[48], bf0[15], cospi[16], bf0[14], cos_bit[stage]);
+  bf1[16] = bf0[16];
+  bf1[17] = bf0[17];
+  bf1[18] = bf0[18];
+  bf1[19] = bf0[19];
+  bf1[20] = half_btf(cospi[16], bf0[20], cospi[48], bf0[21], cos_bit[stage]);
+  bf1[21] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[20], cos_bit[stage]);
+  bf1[22] = half_btf(-cospi[48], bf0[22], cospi[16], bf0[23], cos_bit[stage]);
+  bf1[23] = half_btf(cospi[48], bf0[23], cospi[16], bf0[22], cos_bit[stage]);
+  bf1[24] = bf0[24];
+  bf1[25] = bf0[25];
+  bf1[26] = bf0[26];
+  bf1[27] = bf0[27];
+  bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[29], cos_bit[stage]);
+  bf1[29] = half_btf(-cospi[16], bf0[29], cospi[48], bf0[28], cos_bit[stage]);
+  bf1[30] = half_btf(-cospi[48], bf0[30], cospi[16], bf0[31], cos_bit[stage]);
+  bf1[31] = half_btf(cospi[48], bf0[31], cospi[16], bf0[30], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 9
+  stage++;
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[2];
+  bf1[1] = bf0[1] + bf0[3];
+  bf1[2] = -bf0[2] + bf0[0];
+  bf1[3] = -bf0[3] + bf0[1];
+  bf1[4] = bf0[4] + bf0[6];
+  bf1[5] = bf0[5] + bf0[7];
+  bf1[6] = -bf0[6] + bf0[4];
+  bf1[7] = -bf0[7] + bf0[5];
+  bf1[8] = bf0[8] + bf0[10];
+  bf1[9] = bf0[9] + bf0[11];
+  bf1[10] = -bf0[10] + bf0[8];
+  bf1[11] = -bf0[11] + bf0[9];
+  bf1[12] = bf0[12] + bf0[14];
+  bf1[13] = bf0[13] + bf0[15];
+  bf1[14] = -bf0[14] + bf0[12];
+  bf1[15] = -bf0[15] + bf0[13];
+  bf1[16] = bf0[16] + bf0[18];
+  bf1[17] = bf0[17] + bf0[19];
+  bf1[18] = -bf0[18] + bf0[16];
+  bf1[19] = -bf0[19] + bf0[17];
+  bf1[20] = bf0[20] + bf0[22];
+  bf1[21] = bf0[21] + bf0[23];
+  bf1[22] = -bf0[22] + bf0[20];
+  bf1[23] = -bf0[23] + bf0[21];
+  bf1[24] = bf0[24] + bf0[26];
+  bf1[25] = bf0[25] + bf0[27];
+  bf1[26] = -bf0[26] + bf0[24];
+  bf1[27] = -bf0[27] + bf0[25];
+  bf1[28] = bf0[28] + bf0[30];
+  bf1[29] = bf0[29] + bf0[31];
+  bf1[30] = -bf0[30] + bf0[28];
+  bf1[31] = -bf0[31] + bf0[29];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 10
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[5];
+  bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
+  bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit[stage]);
+  bf1[8] = bf0[8];
+  bf1[9] = bf0[9];
+  bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
+  bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[10], cos_bit[stage]);
+  bf1[12] = bf0[12];
+  bf1[13] = bf0[13];
+  bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
+  bf1[15] = half_btf(-cospi[32], bf0[15], cospi[32], bf0[14], cos_bit[stage]);
+  bf1[16] = bf0[16];
+  bf1[17] = bf0[17];
+  bf1[18] = half_btf(cospi[32], bf0[18], cospi[32], bf0[19], cos_bit[stage]);
+  bf1[19] = half_btf(-cospi[32], bf0[19], cospi[32], bf0[18], cos_bit[stage]);
+  bf1[20] = bf0[20];
+  bf1[21] = bf0[21];
+  bf1[22] = half_btf(cospi[32], bf0[22], cospi[32], bf0[23], cos_bit[stage]);
+  bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[22], cos_bit[stage]);
+  bf1[24] = bf0[24];
+  bf1[25] = bf0[25];
+  bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[27], cos_bit[stage]);
+  bf1[27] = half_btf(-cospi[32], bf0[27], cospi[32], bf0[26], cos_bit[stage]);
+  bf1[28] = bf0[28];
+  bf1[29] = bf0[29];
+  bf1[30] = half_btf(cospi[32], bf0[30], cospi[32], bf0[31], cos_bit[stage]);
+  bf1[31] = half_btf(-cospi[32], bf0[31], cospi[32], bf0[30], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 11
+  stage++;
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0];
+  bf1[1] = -bf0[16];
+  bf1[2] = bf0[24];
+  bf1[3] = -bf0[8];
+  bf1[4] = bf0[12];
+  bf1[5] = -bf0[28];
+  bf1[6] = bf0[20];
+  bf1[7] = -bf0[4];
+  bf1[8] = bf0[6];
+  bf1[9] = -bf0[22];
+  bf1[10] = bf0[30];
+  bf1[11] = -bf0[14];
+  bf1[12] = bf0[10];
+  bf1[13] = -bf0[26];
+  bf1[14] = bf0[18];
+  bf1[15] = -bf0[2];
+  bf1[16] = bf0[3];
+  bf1[17] = -bf0[19];
+  bf1[18] = bf0[27];
+  bf1[19] = -bf0[11];
+  bf1[20] = bf0[15];
+  bf1[21] = -bf0[31];
+  bf1[22] = bf0[23];
+  bf1[23] = -bf0[7];
+  bf1[24] = bf0[5];
+  bf1[25] = -bf0[21];
+  bf1[26] = bf0[29];
+  bf1[27] = -bf0[13];
+  bf1[28] = bf0[9];
+  bf1[29] = -bf0[25];
+  bf1[30] = bf0[17];
+  bf1[31] = -bf0[1];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+}
diff --git a/av1/common/vp10_fwd_txfm1d.h b/av1/common/vp10_fwd_txfm1d.h
new file mode 100644
index 0000000..ab9d2ee
--- /dev/null
+++ b/av1/common/vp10_fwd_txfm1d.h
@@ -0,0 +1,44 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_FWD_TXFM1D_H_
+#define VP10_FWD_TXFM1D_H_
+
+#include "av1/common/vp10_txfm.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_fdct4_new(const int32_t *input, int32_t *output,
+                    const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fdct8_new(const int32_t *input, int32_t *output,
+                    const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fdct16_new(const int32_t *input, int32_t *output,
+                     const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fdct32_new(const int32_t *input, int32_t *output,
+                     const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fdct64_new(const int32_t *input, int32_t *output,
+                     const int8_t *cos_bit, const int8_t *stage_range);
+
+void vp10_fadst4_new(const int32_t *input, int32_t *output,
+                     const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fadst8_new(const int32_t *input, int32_t *output,
+                     const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fadst16_new(const int32_t *input, int32_t *output,
+                      const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fadst32_new(const int32_t *input, int32_t *output,
+                      const int8_t *cos_bit, const int8_t *stage_range);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // VP10_FWD_TXFM1D_H_
diff --git a/av1/common/vp10_fwd_txfm2d.c b/av1/common/vp10_fwd_txfm2d.c
new file mode 100644
index 0000000..85c6b68
--- /dev/null
+++ b/av1/common/vp10_fwd_txfm2d.c
@@ -0,0 +1,177 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "./vp10_rtcd.h"
+#include "av1/common/enums.h"
+#include "av1/common/vp10_fwd_txfm1d.h"
+#include "av1/common/vp10_fwd_txfm2d_cfg.h"
+#include "av1/common/vp10_txfm.h"
+
+static INLINE TxfmFunc fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
+  switch (txfm_type) {
+    case TXFM_TYPE_DCT4: return vp10_fdct4_new;
+    case TXFM_TYPE_DCT8: return vp10_fdct8_new;
+    case TXFM_TYPE_DCT16: return vp10_fdct16_new;
+    case TXFM_TYPE_DCT32: return vp10_fdct32_new;
+    case TXFM_TYPE_ADST4: return vp10_fadst4_new;
+    case TXFM_TYPE_ADST8: return vp10_fadst8_new;
+    case TXFM_TYPE_ADST16: return vp10_fadst16_new;
+    case TXFM_TYPE_ADST32: return vp10_fadst32_new;
+    default: assert(0); return NULL;
+  }
+}
+
+static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
+                                const int stride, const TXFM_2D_FLIP_CFG *cfg,
+                                int32_t *buf) {
+  int c, r;
+  const int txfm_size = cfg->cfg->txfm_size;
+  const int8_t *shift = cfg->cfg->shift;
+  const int8_t *stage_range_col = cfg->cfg->stage_range_col;
+  const int8_t *stage_range_row = cfg->cfg->stage_range_row;
+  const int8_t *cos_bit_col = cfg->cfg->cos_bit_col;
+  const int8_t *cos_bit_row = cfg->cfg->cos_bit_row;
+  const TxfmFunc txfm_func_col = fwd_txfm_type_to_func(cfg->cfg->txfm_type_col);
+  const TxfmFunc txfm_func_row = fwd_txfm_type_to_func(cfg->cfg->txfm_type_row);
+
+  // use output buffer as temp buffer
+  int32_t *temp_in = output;
+  int32_t *temp_out = output + txfm_size;
+
+  // Columns
+  for (c = 0; c < txfm_size; ++c) {
+    if (cfg->ud_flip == 0) {
+      for (r = 0; r < txfm_size; ++r) temp_in[r] = input[r * stride + c];
+    } else {
+      for (r = 0; r < txfm_size; ++r)
+        // flip upside down
+        temp_in[r] = input[(txfm_size - r - 1) * stride + c];
+    }
+    round_shift_array(temp_in, txfm_size, -shift[0]);
+    txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
+    round_shift_array(temp_out, txfm_size, -shift[1]);
+    if (cfg->lr_flip == 0) {
+      for (r = 0; r < txfm_size; ++r) buf[r * txfm_size + c] = temp_out[r];
+    } else {
+      for (r = 0; r < txfm_size; ++r)
+        // flip from left to right
+        buf[r * txfm_size + (txfm_size - c - 1)] = temp_out[r];
+    }
+  }
+
+  // Rows
+  for (r = 0; r < txfm_size; ++r) {
+    txfm_func_row(buf + r * txfm_size, output + r * txfm_size, cos_bit_row,
+                  stage_range_row);
+    round_shift_array(output + r * txfm_size, txfm_size, -shift[2]);
+  }
+}
+
+void vp10_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride,
+                           int tx_type, int bd) {
+  int32_t txfm_buf[4 * 4];
+  TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_cfg(tx_type, TX_4X4);
+  (void)bd;
+  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
+}
+
+void vp10_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride,
+                           int tx_type, int bd) {
+  int32_t txfm_buf[8 * 8];
+  TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_cfg(tx_type, TX_8X8);
+  (void)bd;
+  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
+}
+
+void vp10_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride,
+                             int tx_type, int bd) {
+  int32_t txfm_buf[16 * 16];
+  TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_cfg(tx_type, TX_16X16);
+  (void)bd;
+  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
+}
+
+void vp10_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride,
+                             int tx_type, int bd) {
+  int32_t txfm_buf[32 * 32];
+  TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_cfg(tx_type, TX_32X32);
+  (void)bd;
+  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
+}
+
+void vp10_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride,
+                             int tx_type, int bd) {
+  int32_t txfm_buf[64 * 64];
+  TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_64x64_cfg(tx_type);
+  (void)bd;
+  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
+}
+
+#if CONFIG_EXT_TX
+static const TXFM_2D_CFG *fwd_txfm_cfg_ls[FLIPADST_ADST + 1][TX_SIZES] = {
+  { &fwd_txfm_2d_cfg_dct_dct_4, &fwd_txfm_2d_cfg_dct_dct_8,
+    &fwd_txfm_2d_cfg_dct_dct_16, &fwd_txfm_2d_cfg_dct_dct_32 },
+  { &fwd_txfm_2d_cfg_adst_dct_4, &fwd_txfm_2d_cfg_adst_dct_8,
+    &fwd_txfm_2d_cfg_adst_dct_16, &fwd_txfm_2d_cfg_adst_dct_32 },
+  { &fwd_txfm_2d_cfg_dct_adst_4, &fwd_txfm_2d_cfg_dct_adst_8,
+    &fwd_txfm_2d_cfg_dct_adst_16, &fwd_txfm_2d_cfg_dct_adst_32 },
+  { &fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
+    &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 },
+  { &fwd_txfm_2d_cfg_adst_dct_4, &fwd_txfm_2d_cfg_adst_dct_8,
+    &fwd_txfm_2d_cfg_adst_dct_16, &fwd_txfm_2d_cfg_adst_dct_32 },
+  { &fwd_txfm_2d_cfg_dct_adst_4, &fwd_txfm_2d_cfg_dct_adst_8,
+    &fwd_txfm_2d_cfg_dct_adst_16, &fwd_txfm_2d_cfg_dct_adst_32 },
+  { &fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
+    &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 },
+  { &fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
+    &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 },
+  { &fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
+    &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 },
+};
+#else   // CONFIG_EXT_TX
+static const TXFM_2D_CFG *fwd_txfm_cfg_ls[TX_TYPES][TX_SIZES] = {
+  { &fwd_txfm_2d_cfg_dct_dct_4, &fwd_txfm_2d_cfg_dct_dct_8,
+    &fwd_txfm_2d_cfg_dct_dct_16, &fwd_txfm_2d_cfg_dct_dct_32 },
+  { &fwd_txfm_2d_cfg_adst_dct_4, &fwd_txfm_2d_cfg_adst_dct_8,
+    &fwd_txfm_2d_cfg_adst_dct_16, &fwd_txfm_2d_cfg_adst_dct_32 },
+  { &fwd_txfm_2d_cfg_dct_adst_4, &fwd_txfm_2d_cfg_dct_adst_8,
+    &fwd_txfm_2d_cfg_dct_adst_16, &fwd_txfm_2d_cfg_dct_adst_32 },
+  { &fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
+    &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 },
+};
+#endif  // CONFIG_EXT_TX
+
+TXFM_2D_FLIP_CFG vp10_get_fwd_txfm_cfg(int tx_type, int tx_size) {
+  TXFM_2D_FLIP_CFG cfg;
+  set_flip_cfg(tx_type, &cfg);
+  cfg.cfg = fwd_txfm_cfg_ls[tx_type][tx_size];
+  return cfg;
+}
+
+TXFM_2D_FLIP_CFG vp10_get_fwd_txfm_64x64_cfg(int tx_type) {
+  TXFM_2D_FLIP_CFG cfg;
+  switch (tx_type) {
+    case DCT_DCT:
+      cfg.cfg = &fwd_txfm_2d_cfg_dct_dct_64;
+      cfg.ud_flip = 0;
+      cfg.lr_flip = 0;
+      break;
+    case ADST_DCT:
+    case DCT_ADST:
+    case ADST_ADST:
+    default:
+      cfg.ud_flip = 0;
+      cfg.lr_flip = 0;
+      assert(0);
+  }
+  return cfg;
+}
diff --git a/av1/common/vp10_fwd_txfm2d_cfg.h b/av1/common/vp10_fwd_txfm2d_cfg.h
new file mode 100644
index 0000000..f780b87
--- /dev/null
+++ b/av1/common/vp10_fwd_txfm2d_cfg.h
@@ -0,0 +1,443 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_FWD_TXFM2D_CFG_H_
+#define VP10_FWD_TXFM2D_CFG_H_
+#include "av1/common/enums.h"
+#include "av1/common/vp10_fwd_txfm1d.h"
+//  ---------------- config fwd_dct_dct_4 ----------------
+static const int8_t fwd_shift_dct_dct_4[3] = { 2, 0, 0 };
+static const int8_t fwd_stage_range_col_dct_dct_4[4] = { 15, 16, 17, 17 };
+static const int8_t fwd_stage_range_row_dct_dct_4[4] = { 17, 18, 18, 18 };
+static const int8_t fwd_cos_bit_col_dct_dct_4[4] = { 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_dct_dct_4[4] = { 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_4 = {
+  4,  // .txfm_size
+  4,  // .stage_num_col
+  4,  // .stage_num_row
+  // 0,  // .log_scale
+  fwd_shift_dct_dct_4,            // .shift
+  fwd_stage_range_col_dct_dct_4,  // .stage_range_col
+  fwd_stage_range_row_dct_dct_4,  // .stage_range_row
+  fwd_cos_bit_col_dct_dct_4,      // .cos_bit_col
+  fwd_cos_bit_row_dct_dct_4,      // .cos_bit_row
+  TXFM_TYPE_DCT4,                 // .txfm_type_col
+  TXFM_TYPE_DCT4
+};  // .txfm_type_row
+
+//  ---------------- config fwd_dct_dct_8 ----------------
+static const int8_t fwd_shift_dct_dct_8[3] = { 2, -1, 0 };
+static const int8_t fwd_stage_range_col_dct_dct_8[6] = {
+  15, 16, 17, 18, 18, 18
+};
+static const int8_t fwd_stage_range_row_dct_dct_8[6] = {
+  17, 18, 19, 19, 19, 19
+};
+static const int8_t fwd_cos_bit_col_dct_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_dct_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_8 = {
+  8,  // .txfm_size
+  6,  // .stage_num_col
+  6,  // .stage_num_row
+  // 0,  // .log_scale
+  fwd_shift_dct_dct_8,            // .shift
+  fwd_stage_range_col_dct_dct_8,  // .stage_range_col
+  fwd_stage_range_row_dct_dct_8,  // .stage_range_row
+  fwd_cos_bit_col_dct_dct_8,      // .cos_bit_col
+  fwd_cos_bit_row_dct_dct_8,      // .cos_bit_row
+  TXFM_TYPE_DCT8,                 // .txfm_type_col
+  TXFM_TYPE_DCT8
+};  // .txfm_type_row
+
+//  ---------------- config fwd_dct_dct_16 ----------------
+static const int8_t fwd_shift_dct_dct_16[3] = { 2, -2, 0 };
+static const int8_t fwd_stage_range_col_dct_dct_16[8] = { 15, 16, 17, 18,
+                                                          19, 19, 19, 19 };
+static const int8_t fwd_stage_range_row_dct_dct_16[8] = { 17, 18, 19, 20,
+                                                          20, 20, 20, 20 };
+static const int8_t fwd_cos_bit_col_dct_dct_16[8] = { 13, 13, 13, 13,
+                                                      13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_dct_dct_16[8] = { 12, 12, 12, 12,
+                                                      12, 12, 12, 12 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_16 = {
+  16,  // .txfm_size
+  8,   // .stage_num_col
+  8,   // .stage_num_row
+  // 0,  // .log_scale
+  fwd_shift_dct_dct_16,            // .shift
+  fwd_stage_range_col_dct_dct_16,  // .stage_range_col
+  fwd_stage_range_row_dct_dct_16,  // .stage_range_row
+  fwd_cos_bit_col_dct_dct_16,      // .cos_bit_col
+  fwd_cos_bit_row_dct_dct_16,      // .cos_bit_row
+  TXFM_TYPE_DCT16,                 // .txfm_type_col
+  TXFM_TYPE_DCT16
+};  // .txfm_type_row
+
+//  ---------------- config fwd_dct_dct_32 ----------------
+static const int8_t fwd_shift_dct_dct_32[3] = { 2, -4, 0 };
+static const int8_t fwd_stage_range_col_dct_dct_32[10] = { 15, 16, 17, 18, 19,
+                                                           20, 20, 20, 20, 20 };
+static const int8_t fwd_stage_range_row_dct_dct_32[10] = { 16, 17, 18, 19, 20,
+                                                           20, 20, 20, 20, 20 };
+static const int8_t fwd_cos_bit_col_dct_dct_32[10] = { 12, 12, 12, 12, 12,
+                                                       12, 12, 12, 12, 12 };
+static const int8_t fwd_cos_bit_row_dct_dct_32[10] = { 12, 12, 12, 12, 12,
+                                                       12, 12, 12, 12, 12 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_32 = {
+  32,  // .txfm_size
+  10,  // .stage_num_col
+  10,  // .stage_num_row
+  // 1,  // .log_scale
+  fwd_shift_dct_dct_32,            // .shift
+  fwd_stage_range_col_dct_dct_32,  // .stage_range_col
+  fwd_stage_range_row_dct_dct_32,  // .stage_range_row
+  fwd_cos_bit_col_dct_dct_32,      // .cos_bit_col
+  fwd_cos_bit_row_dct_dct_32,      // .cos_bit_row
+  TXFM_TYPE_DCT32,                 // .txfm_type_col
+  TXFM_TYPE_DCT32
+};  // .txfm_type_row
+
+//  ---------------- config fwd_dct_dct_64 ----------------
+static const int8_t fwd_shift_dct_dct_64[3] = { 2, -2, -2 };
+static const int8_t fwd_stage_range_col_dct_dct_64[12] = {
+  13, 14, 15, 16, 17, 18, 19, 19, 19, 19, 19, 19
+};
+static const int8_t fwd_stage_range_row_dct_dct_64[12] = {
+  17, 18, 19, 20, 21, 22, 22, 22, 22, 22, 22, 22
+};
+static const int8_t fwd_cos_bit_col_dct_dct_64[12] = { 15, 15, 15, 15, 15, 14,
+                                                       13, 13, 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_dct_dct_64[12] = { 15, 14, 13, 12, 11, 10,
+                                                       10, 10, 10, 10, 10, 10 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_64 = {
+  64,                              // .txfm_size
+  12,                              // .stage_num_col
+  12,                              // .stage_num_row
+  fwd_shift_dct_dct_64,            // .shift
+  fwd_stage_range_col_dct_dct_64,  // .stage_range_col
+  fwd_stage_range_row_dct_dct_64,  // .stage_range_row
+  fwd_cos_bit_col_dct_dct_64,      // .cos_bit_col
+  fwd_cos_bit_row_dct_dct_64,      // .cos_bit_row
+  TXFM_TYPE_DCT64,                 // .txfm_type_col
+  TXFM_TYPE_DCT64
+};  // .txfm_type_row
+
+//  ---------------- config fwd_dct_adst_4 ----------------
+static const int8_t fwd_shift_dct_adst_4[3] = { 2, 0, 0 };
+static const int8_t fwd_stage_range_col_dct_adst_4[4] = { 15, 16, 17, 17 };
+static const int8_t fwd_stage_range_row_dct_adst_4[6] = {
+  17, 17, 17, 18, 18, 18
+};
+static const int8_t fwd_cos_bit_col_dct_adst_4[4] = { 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_dct_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_4 = {
+  4,  // .txfm_size
+  4,  // .stage_num_col
+  6,  // .stage_num_row
+  // 0,  // .log_scale
+  fwd_shift_dct_adst_4,            // .shift
+  fwd_stage_range_col_dct_adst_4,  // .stage_range_col
+  fwd_stage_range_row_dct_adst_4,  // .stage_range_row
+  fwd_cos_bit_col_dct_adst_4,      // .cos_bit_col
+  fwd_cos_bit_row_dct_adst_4,      // .cos_bit_row
+  TXFM_TYPE_DCT4,                  // .txfm_type_col
+  TXFM_TYPE_ADST4
+};  // .txfm_type_row
+
+//  ---------------- config fwd_dct_adst_8 ----------------
+static const int8_t fwd_shift_dct_adst_8[3] = { 2, -1, 0 };
+static const int8_t fwd_stage_range_col_dct_adst_8[6] = {
+  15, 16, 17, 18, 18, 18
+};
+static const int8_t fwd_stage_range_row_dct_adst_8[8] = { 17, 17, 17, 18,
+                                                          18, 19, 19, 19 };
+static const int8_t fwd_cos_bit_col_dct_adst_8[6] = { 13, 13, 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_dct_adst_8[8] = { 13, 13, 13, 13,
+                                                      13, 13, 13, 13 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_8 = {
+  8,  // .txfm_size
+  6,  // .stage_num_col
+  8,  // .stage_num_row
+  // 0,  // .log_scale
+  fwd_shift_dct_adst_8,            // .shift
+  fwd_stage_range_col_dct_adst_8,  // .stage_range_col
+  fwd_stage_range_row_dct_adst_8,  // .stage_range_row
+  fwd_cos_bit_col_dct_adst_8,      // .cos_bit_col
+  fwd_cos_bit_row_dct_adst_8,      // .cos_bit_row
+  TXFM_TYPE_DCT8,                  // .txfm_type_col
+  TXFM_TYPE_ADST8
+};  // .txfm_type_row
+
+//  ---------------- config fwd_dct_adst_16 ----------------
+static const int8_t fwd_shift_dct_adst_16[3] = { 2, -2, 0 };
+static const int8_t fwd_stage_range_col_dct_adst_16[8] = { 15, 16, 17, 18,
+                                                           19, 19, 19, 19 };
+static const int8_t fwd_stage_range_row_dct_adst_16[10] = {
+  17, 17, 17, 18, 18, 19, 19, 20, 20, 20
+};
+static const int8_t fwd_cos_bit_col_dct_adst_16[8] = { 13, 13, 13, 13,
+                                                       13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_dct_adst_16[10] = { 12, 12, 12, 12, 12,
+                                                        12, 12, 12, 12, 12 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_16 = {
+  16,  // .txfm_size
+  8,   // .stage_num_col
+  10,  // .stage_num_row
+  // 0,  // .log_scale
+  fwd_shift_dct_adst_16,            // .shift
+  fwd_stage_range_col_dct_adst_16,  // .stage_range_col
+  fwd_stage_range_row_dct_adst_16,  // .stage_range_row
+  fwd_cos_bit_col_dct_adst_16,      // .cos_bit_col
+  fwd_cos_bit_row_dct_adst_16,      // .cos_bit_row
+  TXFM_TYPE_DCT16,                  // .txfm_type_col
+  TXFM_TYPE_ADST16
+};  // .txfm_type_row
+
+//  ---------------- config fwd_dct_adst_32 ----------------
+static const int8_t fwd_shift_dct_adst_32[3] = { 2, -4, 0 };
+static const int8_t fwd_stage_range_col_dct_adst_32[10] = {
+  15, 16, 17, 18, 19, 20, 20, 20, 20, 20
+};
+static const int8_t fwd_stage_range_row_dct_adst_32[12] = {
+  16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20
+};
+static const int8_t fwd_cos_bit_col_dct_adst_32[10] = { 12, 12, 12, 12, 12,
+                                                        12, 12, 12, 12, 12 };
+static const int8_t fwd_cos_bit_row_dct_adst_32[12] = {
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
+};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_32 = {
+  32,  // .txfm_size
+  10,  // .stage_num_col
+  12,  // .stage_num_row
+  // 1,  // .log_scale
+  fwd_shift_dct_adst_32,            // .shift
+  fwd_stage_range_col_dct_adst_32,  // .stage_range_col
+  fwd_stage_range_row_dct_adst_32,  // .stage_range_row
+  fwd_cos_bit_col_dct_adst_32,      // .cos_bit_col
+  fwd_cos_bit_row_dct_adst_32,      // .cos_bit_row
+  TXFM_TYPE_DCT32,                  // .txfm_type_col
+  TXFM_TYPE_ADST32
+};  // .txfm_type_row
+//  ---------------- config fwd_adst_adst_4 ----------------
+static const int8_t fwd_shift_adst_adst_4[3] = { 2, 0, 0 };
+static const int8_t fwd_stage_range_col_adst_adst_4[6] = { 15, 15, 16,
+                                                           17, 17, 17 };
+static const int8_t fwd_stage_range_row_adst_adst_4[6] = { 17, 17, 17,
+                                                           18, 18, 18 };
+static const int8_t fwd_cos_bit_col_adst_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_adst_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_4 = {
+  4,  // .txfm_size
+  6,  // .stage_num_col
+  6,  // .stage_num_row
+  // 0,  // .log_scale
+  fwd_shift_adst_adst_4,            // .shift
+  fwd_stage_range_col_adst_adst_4,  // .stage_range_col
+  fwd_stage_range_row_adst_adst_4,  // .stage_range_row
+  fwd_cos_bit_col_adst_adst_4,      // .cos_bit_col
+  fwd_cos_bit_row_adst_adst_4,      // .cos_bit_row
+  TXFM_TYPE_ADST4,                  // .txfm_type_col
+  TXFM_TYPE_ADST4
+};  // .txfm_type_row
+
+//  ---------------- config fwd_adst_adst_8 ----------------
+static const int8_t fwd_shift_adst_adst_8[3] = { 2, -1, 0 };
+static const int8_t fwd_stage_range_col_adst_adst_8[8] = { 15, 15, 16, 17,
+                                                           17, 18, 18, 18 };
+static const int8_t fwd_stage_range_row_adst_adst_8[8] = { 17, 17, 17, 18,
+                                                           18, 19, 19, 19 };
+static const int8_t fwd_cos_bit_col_adst_adst_8[8] = { 13, 13, 13, 13,
+                                                       13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_adst_adst_8[8] = { 13, 13, 13, 13,
+                                                       13, 13, 13, 13 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_8 = {
+  8,  // .txfm_size
+  8,  // .stage_num_col
+  8,  // .stage_num_row
+  // 0,  // .log_scale
+  fwd_shift_adst_adst_8,            // .shift
+  fwd_stage_range_col_adst_adst_8,  // .stage_range_col
+  fwd_stage_range_row_adst_adst_8,  // .stage_range_row
+  fwd_cos_bit_col_adst_adst_8,      // .cos_bit_col
+  fwd_cos_bit_row_adst_adst_8,      // .cos_bit_row
+  TXFM_TYPE_ADST8,                  // .txfm_type_col
+  TXFM_TYPE_ADST8
+};  // .txfm_type_row
+
+//  ---------------- config fwd_adst_adst_16 ----------------
+static const int8_t fwd_shift_adst_adst_16[3] = { 2, -2, 0 };
+static const int8_t fwd_stage_range_col_adst_adst_16[10] = {
+  15, 15, 16, 17, 17, 18, 18, 19, 19, 19
+};
+static const int8_t fwd_stage_range_row_adst_adst_16[10] = {
+  17, 17, 17, 18, 18, 19, 19, 20, 20, 20
+};
+static const int8_t fwd_cos_bit_col_adst_adst_16[10] = { 13, 13, 13, 13, 13,
+                                                         13, 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_adst_adst_16[10] = { 12, 12, 12, 12, 12,
+                                                         12, 12, 12, 12, 12 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_16 = {
+  16,  // .txfm_size
+  10,  // .stage_num_col
+  10,  // .stage_num_row
+  // 0,  // .log_scale
+  fwd_shift_adst_adst_16,            // .shift
+  fwd_stage_range_col_adst_adst_16,  // .stage_range_col
+  fwd_stage_range_row_adst_adst_16,  // .stage_range_row
+  fwd_cos_bit_col_adst_adst_16,      // .cos_bit_col
+  fwd_cos_bit_row_adst_adst_16,      // .cos_bit_row
+  TXFM_TYPE_ADST16,                  // .txfm_type_col
+  TXFM_TYPE_ADST16
+};  // .txfm_type_row
+
+//  ---------------- config fwd_adst_adst_32 ----------------
+static const int8_t fwd_shift_adst_adst_32[3] = { 2, -4, 0 };
+static const int8_t fwd_stage_range_col_adst_adst_32[12] = {
+  15, 15, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20
+};
+static const int8_t fwd_stage_range_row_adst_adst_32[12] = {
+  16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20
+};
+static const int8_t fwd_cos_bit_col_adst_adst_32[12] = {
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
+};
+static const int8_t fwd_cos_bit_row_adst_adst_32[12] = {
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
+};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_32 = {
+  32,  // .txfm_size
+  12,  // .stage_num_col
+  12,  // .stage_num_row
+  // 1,  // .log_scale
+  fwd_shift_adst_adst_32,            // .shift
+  fwd_stage_range_col_adst_adst_32,  // .stage_range_col
+  fwd_stage_range_row_adst_adst_32,  // .stage_range_row
+  fwd_cos_bit_col_adst_adst_32,      // .cos_bit_col
+  fwd_cos_bit_row_adst_adst_32,      // .cos_bit_row
+  TXFM_TYPE_ADST32,                  // .txfm_type_col
+  TXFM_TYPE_ADST32
+};  // .txfm_type_row
+
+//  ---------------- config fwd_adst_dct_4 ----------------
+static const int8_t fwd_shift_adst_dct_4[3] = { 2, 0, 0 };
+static const int8_t fwd_stage_range_col_adst_dct_4[6] = {
+  15, 15, 16, 17, 17, 17
+};
+static const int8_t fwd_stage_range_row_adst_dct_4[4] = { 17, 18, 18, 18 };
+static const int8_t fwd_cos_bit_col_adst_dct_4[6] = { 13, 13, 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_adst_dct_4[4] = { 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_4 = {
+  4,  // .txfm_size
+  6,  // .stage_num_col
+  4,  // .stage_num_row
+  // 0,  // .log_scale
+  fwd_shift_adst_dct_4,            // .shift
+  fwd_stage_range_col_adst_dct_4,  // .stage_range_col
+  fwd_stage_range_row_adst_dct_4,  // .stage_range_row
+  fwd_cos_bit_col_adst_dct_4,      // .cos_bit_col
+  fwd_cos_bit_row_adst_dct_4,      // .cos_bit_row
+  TXFM_TYPE_ADST4,                 // .txfm_type_col
+  TXFM_TYPE_DCT4
+};  // .txfm_type_row
+
+//  ---------------- config fwd_adst_dct_8 ----------------
+static const int8_t fwd_shift_adst_dct_8[3] = { 2, -1, 0 };
+static const int8_t fwd_stage_range_col_adst_dct_8[8] = { 15, 15, 16, 17,
+                                                          17, 18, 18, 18 };
+static const int8_t fwd_stage_range_row_adst_dct_8[6] = {
+  17, 18, 19, 19, 19, 19
+};
+static const int8_t fwd_cos_bit_col_adst_dct_8[8] = { 13, 13, 13, 13,
+                                                      13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_adst_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_8 = {
+  8,  // .txfm_size
+  8,  // .stage_num_col
+  6,  // .stage_num_row
+  // 0,  // .log_scale
+  fwd_shift_adst_dct_8,            // .shift
+  fwd_stage_range_col_adst_dct_8,  // .stage_range_col
+  fwd_stage_range_row_adst_dct_8,  // .stage_range_row
+  fwd_cos_bit_col_adst_dct_8,      // .cos_bit_col
+  fwd_cos_bit_row_adst_dct_8,      // .cos_bit_row
+  TXFM_TYPE_ADST8,                 // .txfm_type_col
+  TXFM_TYPE_DCT8
+};  // .txfm_type_row
+
+//  ---------------- config fwd_adst_dct_16 ----------------
+static const int8_t fwd_shift_adst_dct_16[3] = { 2, -2, 0 };
+static const int8_t fwd_stage_range_col_adst_dct_16[10] = {
+  15, 15, 16, 17, 17, 18, 18, 19, 19, 19
+};
+static const int8_t fwd_stage_range_row_adst_dct_16[8] = { 17, 18, 19, 20,
+                                                           20, 20, 20, 20 };
+static const int8_t fwd_cos_bit_col_adst_dct_16[10] = { 13, 13, 13, 13, 13,
+                                                        13, 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_adst_dct_16[8] = { 12, 12, 12, 12,
+                                                       12, 12, 12, 12 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_16 = {
+  16,  // .txfm_size
+  10,  // .stage_num_col
+  8,   // .stage_num_row
+  // 0,  // .log_scale
+  fwd_shift_adst_dct_16,            // .shift
+  fwd_stage_range_col_adst_dct_16,  // .stage_range_col
+  fwd_stage_range_row_adst_dct_16,  // .stage_range_row
+  fwd_cos_bit_col_adst_dct_16,      // .cos_bit_col
+  fwd_cos_bit_row_adst_dct_16,      // .cos_bit_row
+  TXFM_TYPE_ADST16,                 // .txfm_type_col
+  TXFM_TYPE_DCT16
+};  // .txfm_type_row
+
+//  ---------------- config fwd_adst_dct_32 ----------------
+static const int8_t fwd_shift_adst_dct_32[3] = { 2, -4, 0 };
+static const int8_t fwd_stage_range_col_adst_dct_32[12] = {
+  15, 15, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20
+};
+static const int8_t fwd_stage_range_row_adst_dct_32[10] = {
+  16, 17, 18, 19, 20, 20, 20, 20, 20, 20
+};
+static const int8_t fwd_cos_bit_col_adst_dct_32[12] = {
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
+};
+static const int8_t fwd_cos_bit_row_adst_dct_32[10] = { 12, 12, 12, 12, 12,
+                                                        12, 12, 12, 12, 12 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_32 = {
+  32,  // .txfm_size
+  12,  // .stage_num_col
+  10,  // .stage_num_row
+  // 1,  // .log_scale
+  fwd_shift_adst_dct_32,            // .shift
+  fwd_stage_range_col_adst_dct_32,  // .stage_range_col
+  fwd_stage_range_row_adst_dct_32,  // .stage_range_row
+  fwd_cos_bit_col_adst_dct_32,      // .cos_bit_col
+  fwd_cos_bit_row_adst_dct_32,      // .cos_bit_row
+  TXFM_TYPE_ADST32,                 // .txfm_type_col
+  TXFM_TYPE_DCT32
+};      // .txfm_type_row
+#endif  // VP10_FWD_TXFM2D_CFG_H_
diff --git a/av1/common/vp10_inv_txfm.c b/av1/common/vp10_inv_txfm.c
new file mode 100644
index 0000000..a74de09
--- /dev/null
+++ b/av1/common/vp10_inv_txfm.c
@@ -0,0 +1,2472 @@
+/*
+ *
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <string.h>
+
+#include "./vp10_rtcd.h"
+#include "av1/common/vp10_inv_txfm.h"
+
+void vp10_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
+  /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
+     0.5 shifts per pixel. */
+  int i;
+  tran_low_t output[16];
+  tran_high_t a1, b1, c1, d1, e1;
+  const tran_low_t *ip = input;
+  tran_low_t *op = output;
+
+  for (i = 0; i < 4; i++) {
+    a1 = ip[0] >> UNIT_QUANT_SHIFT;
+    c1 = ip[1] >> UNIT_QUANT_SHIFT;
+    d1 = ip[2] >> UNIT_QUANT_SHIFT;
+    b1 = ip[3] >> UNIT_QUANT_SHIFT;
+    a1 += c1;
+    d1 -= b1;
+    e1 = (a1 - d1) >> 1;
+    b1 = e1 - b1;
+    c1 = e1 - c1;
+    a1 -= b1;
+    d1 += c1;
+    op[0] = WRAPLOW(a1);
+    op[1] = WRAPLOW(b1);
+    op[2] = WRAPLOW(c1);
+    op[3] = WRAPLOW(d1);
+    ip += 4;
+    op += 4;
+  }
+
+  ip = output;
+  for (i = 0; i < 4; i++) {
+    a1 = ip[4 * 0];
+    c1 = ip[4 * 1];
+    d1 = ip[4 * 2];
+    b1 = ip[4 * 3];
+    a1 += c1;
+    d1 -= b1;
+    e1 = (a1 - d1) >> 1;
+    b1 = e1 - b1;
+    c1 = e1 - c1;
+    a1 -= b1;
+    d1 += c1;
+    dest[stride * 0] = clip_pixel_add(dest[stride * 0], WRAPLOW(a1));
+    dest[stride * 1] = clip_pixel_add(dest[stride * 1], WRAPLOW(b1));
+    dest[stride * 2] = clip_pixel_add(dest[stride * 2], WRAPLOW(c1));
+    dest[stride * 3] = clip_pixel_add(dest[stride * 3], WRAPLOW(d1));
+
+    ip++;
+    dest++;
+  }
+}
+
+void vp10_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest,
+                          int dest_stride) {
+  int i;
+  tran_high_t a1, e1;
+  tran_low_t tmp[4];
+  const tran_low_t *ip = in;
+  tran_low_t *op = tmp;
+
+  a1 = ip[0] >> UNIT_QUANT_SHIFT;
+  e1 = a1 >> 1;
+  a1 -= e1;
+  op[0] = WRAPLOW(a1);
+  op[1] = op[2] = op[3] = WRAPLOW(e1);
+
+  ip = tmp;
+  for (i = 0; i < 4; i++) {
+    e1 = ip[0] >> 1;
+    a1 = ip[0] - e1;
+    dest[dest_stride * 0] = clip_pixel_add(dest[dest_stride * 0], a1);
+    dest[dest_stride * 1] = clip_pixel_add(dest[dest_stride * 1], e1);
+    dest[dest_stride * 2] = clip_pixel_add(dest[dest_stride * 2], e1);
+    dest[dest_stride * 3] = clip_pixel_add(dest[dest_stride * 3], e1);
+    ip++;
+    dest++;
+  }
+}
+
+void vp10_idct4_c(const tran_low_t *input, tran_low_t *output) {
+  tran_low_t step[4];
+  tran_high_t temp1, temp2;
+  // stage 1
+  temp1 = (input[0] + input[2]) * cospi_16_64;
+  temp2 = (input[0] - input[2]) * cospi_16_64;
+  step[0] = WRAPLOW(dct_const_round_shift(temp1));
+  step[1] = WRAPLOW(dct_const_round_shift(temp2));
+  temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64;
+  temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64;
+  step[2] = WRAPLOW(dct_const_round_shift(temp1));
+  step[3] = WRAPLOW(dct_const_round_shift(temp2));
+
+  // stage 2
+  output[0] = WRAPLOW(step[0] + step[3]);
+  output[1] = WRAPLOW(step[1] + step[2]);
+  output[2] = WRAPLOW(step[1] - step[2]);
+  output[3] = WRAPLOW(step[0] - step[3]);
+}
+
+void vp10_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
+  tran_low_t out[4 * 4];
+  tran_low_t *outptr = out;
+  int i, j;
+  tran_low_t temp_in[4], temp_out[4];
+
+  // Rows
+  for (i = 0; i < 4; ++i) {
+    vp10_idct4_c(input, outptr);
+    input += 4;
+    outptr += 4;
+  }
+
+  // Columns
+  for (i = 0; i < 4; ++i) {
+    for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
+    vp10_idct4_c(temp_in, temp_out);
+    for (j = 0; j < 4; ++j) {
+      dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
+                                            ROUND_POWER_OF_TWO(temp_out[j], 4));
+    }
+  }
+}
+
+void vp10_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest,
+                          int dest_stride) {
+  int i;
+  tran_high_t a1;
+  tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
+  out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
+  a1 = ROUND_POWER_OF_TWO(out, 4);
+
+  for (i = 0; i < 4; i++) {
+    dest[0] = clip_pixel_add(dest[0], a1);
+    dest[1] = clip_pixel_add(dest[1], a1);
+    dest[2] = clip_pixel_add(dest[2], a1);
+    dest[3] = clip_pixel_add(dest[3], a1);
+    dest += dest_stride;
+  }
+}
+
+void vp10_idct8_c(const tran_low_t *input, tran_low_t *output) {
+  tran_low_t step1[8], step2[8];
+  tran_high_t temp1, temp2;
+  // stage 1
+  step1[0] = input[0];
+  step1[2] = input[4];
+  step1[1] = input[2];
+  step1[3] = input[6];
+  temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64;
+  temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64;
+  step1[4] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[7] = WRAPLOW(dct_const_round_shift(temp2));
+  temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64;
+  temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64;
+  step1[5] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[6] = WRAPLOW(dct_const_round_shift(temp2));
+
+  // stage 2
+  temp1 = (step1[0] + step1[2]) * cospi_16_64;
+  temp2 = (step1[0] - step1[2]) * cospi_16_64;
+  step2[0] = WRAPLOW(dct_const_round_shift(temp1));
+  step2[1] = WRAPLOW(dct_const_round_shift(temp2));
+  temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64;
+  temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64;
+  step2[2] = WRAPLOW(dct_const_round_shift(temp1));
+  step2[3] = WRAPLOW(dct_const_round_shift(temp2));
+  step2[4] = WRAPLOW(step1[4] + step1[5]);
+  step2[5] = WRAPLOW(step1[4] - step1[5]);
+  step2[6] = WRAPLOW(-step1[6] + step1[7]);
+  step2[7] = WRAPLOW(step1[6] + step1[7]);
+
+  // stage 3
+  step1[0] = WRAPLOW(step2[0] + step2[3]);
+  step1[1] = WRAPLOW(step2[1] + step2[2]);
+  step1[2] = WRAPLOW(step2[1] - step2[2]);
+  step1[3] = WRAPLOW(step2[0] - step2[3]);
+  step1[4] = step2[4];
+  temp1 = (step2[6] - step2[5]) * cospi_16_64;
+  temp2 = (step2[5] + step2[6]) * cospi_16_64;
+  step1[5] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[6] = WRAPLOW(dct_const_round_shift(temp2));
+  step1[7] = step2[7];
+
+  // stage 4
+  output[0] = WRAPLOW(step1[0] + step1[7]);
+  output[1] = WRAPLOW(step1[1] + step1[6]);
+  output[2] = WRAPLOW(step1[2] + step1[5]);
+  output[3] = WRAPLOW(step1[3] + step1[4]);
+  output[4] = WRAPLOW(step1[3] - step1[4]);
+  output[5] = WRAPLOW(step1[2] - step1[5]);
+  output[6] = WRAPLOW(step1[1] - step1[6]);
+  output[7] = WRAPLOW(step1[0] - step1[7]);
+}
+
+void vp10_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
+  tran_low_t out[8 * 8];
+  tran_low_t *outptr = out;
+  int i, j;
+  tran_low_t temp_in[8], temp_out[8];
+
+  // First transform rows
+  for (i = 0; i < 8; ++i) {
+    vp10_idct8_c(input, outptr);
+    input += 8;
+    outptr += 8;
+  }
+
+  // Then transform columns
+  for (i = 0; i < 8; ++i) {
+    for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
+    vp10_idct8_c(temp_in, temp_out);
+    for (j = 0; j < 8; ++j) {
+      dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
+                                            ROUND_POWER_OF_TWO(temp_out[j], 5));
+    }
+  }
+}
+
+void vp10_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
+  int i, j;
+  tran_high_t a1;
+  tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
+  out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
+  a1 = ROUND_POWER_OF_TWO(out, 5);
+  for (j = 0; j < 8; ++j) {
+    for (i = 0; i < 8; ++i) dest[i] = clip_pixel_add(dest[i], a1);
+    dest += stride;
+  }
+}
+
+void vp10_iadst4_c(const tran_low_t *input, tran_low_t *output) {
+  tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
+
+  tran_low_t x0 = input[0];
+  tran_low_t x1 = input[1];
+  tran_low_t x2 = input[2];
+  tran_low_t x3 = input[3];
+
+  if (!(x0 | x1 | x2 | x3)) {
+    output[0] = output[1] = output[2] = output[3] = 0;
+    return;
+  }
+
+  s0 = sinpi_1_9 * x0;
+  s1 = sinpi_2_9 * x0;
+  s2 = sinpi_3_9 * x1;
+  s3 = sinpi_4_9 * x2;
+  s4 = sinpi_1_9 * x2;
+  s5 = sinpi_2_9 * x3;
+  s6 = sinpi_4_9 * x3;
+  s7 = WRAPLOW(x0 - x2 + x3);
+
+  s0 = s0 + s3 + s5;
+  s1 = s1 - s4 - s6;
+  s3 = s2;
+  s2 = sinpi_3_9 * s7;
+
+  // 1-D transform scaling factor is sqrt(2).
+  // The overall dynamic range is 14b (input) + 14b (multiplication scaling)
+  // + 1b (addition) = 29b.
+  // Hence the output bit depth is 15b.
+  output[0] = WRAPLOW(dct_const_round_shift(s0 + s3));
+  output[1] = WRAPLOW(dct_const_round_shift(s1 + s3));
+  output[2] = WRAPLOW(dct_const_round_shift(s2));
+  output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3));
+}
+
+void vp10_iadst8_c(const tran_low_t *input, tran_low_t *output) {
+  int s0, s1, s2, s3, s4, s5, s6, s7;
+
+  tran_high_t x0 = input[7];
+  tran_high_t x1 = input[0];
+  tran_high_t x2 = input[5];
+  tran_high_t x3 = input[2];
+  tran_high_t x4 = input[3];
+  tran_high_t x5 = input[4];
+  tran_high_t x6 = input[1];
+  tran_high_t x7 = input[6];
+
+  if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) {
+    output[0] = output[1] = output[2] = output[3] = output[4] = output[5] =
+        output[6] = output[7] = 0;
+    return;
+  }
+
+  // stage 1
+  s0 = (int)(cospi_2_64 * x0 + cospi_30_64 * x1);
+  s1 = (int)(cospi_30_64 * x0 - cospi_2_64 * x1);
+  s2 = (int)(cospi_10_64 * x2 + cospi_22_64 * x3);
+  s3 = (int)(cospi_22_64 * x2 - cospi_10_64 * x3);
+  s4 = (int)(cospi_18_64 * x4 + cospi_14_64 * x5);
+  s5 = (int)(cospi_14_64 * x4 - cospi_18_64 * x5);
+  s6 = (int)(cospi_26_64 * x6 + cospi_6_64 * x7);
+  s7 = (int)(cospi_6_64 * x6 - cospi_26_64 * x7);
+
+  x0 = WRAPLOW(dct_const_round_shift(s0 + s4));
+  x1 = WRAPLOW(dct_const_round_shift(s1 + s5));
+  x2 = WRAPLOW(dct_const_round_shift(s2 + s6));
+  x3 = WRAPLOW(dct_const_round_shift(s3 + s7));
+  x4 = WRAPLOW(dct_const_round_shift(s0 - s4));
+  x5 = WRAPLOW(dct_const_round_shift(s1 - s5));
+  x6 = WRAPLOW(dct_const_round_shift(s2 - s6));
+  x7 = WRAPLOW(dct_const_round_shift(s3 - s7));
+
+  // stage 2
+  s0 = (int)x0;
+  s1 = (int)x1;
+  s2 = (int)x2;
+  s3 = (int)x3;
+  s4 = (int)(cospi_8_64 * x4 + cospi_24_64 * x5);
+  s5 = (int)(cospi_24_64 * x4 - cospi_8_64 * x5);
+  s6 = (int)(-cospi_24_64 * x6 + cospi_8_64 * x7);
+  s7 = (int)(cospi_8_64 * x6 + cospi_24_64 * x7);
+
+  x0 = WRAPLOW(s0 + s2);
+  x1 = WRAPLOW(s1 + s3);
+  x2 = WRAPLOW(s0 - s2);
+  x3 = WRAPLOW(s1 - s3);
+  x4 = WRAPLOW(dct_const_round_shift(s4 + s6));
+  x5 = WRAPLOW(dct_const_round_shift(s5 + s7));
+  x6 = WRAPLOW(dct_const_round_shift(s4 - s6));
+  x7 = WRAPLOW(dct_const_round_shift(s5 - s7));
+
+  // stage 3
+  s2 = (int)(cospi_16_64 * (x2 + x3));
+  s3 = (int)(cospi_16_64 * (x2 - x3));
+  s6 = (int)(cospi_16_64 * (x6 + x7));
+  s7 = (int)(cospi_16_64 * (x6 - x7));
+
+  x2 = WRAPLOW(dct_const_round_shift(s2));
+  x3 = WRAPLOW(dct_const_round_shift(s3));
+  x6 = WRAPLOW(dct_const_round_shift(s6));
+  x7 = WRAPLOW(dct_const_round_shift(s7));
+
+  output[0] = WRAPLOW(x0);
+  output[1] = WRAPLOW(-x4);
+  output[2] = WRAPLOW(x6);
+  output[3] = WRAPLOW(-x2);
+  output[4] = WRAPLOW(x3);
+  output[5] = WRAPLOW(-x7);
+  output[6] = WRAPLOW(x5);
+  output[7] = WRAPLOW(-x1);
+}
+
+void vp10_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
+  tran_low_t out[8 * 8] = { 0 };
+  tran_low_t *outptr = out;
+  int i, j;
+  tran_low_t temp_in[8], temp_out[8];
+
+  // First transform rows
+  // only first 4 row has non-zero coefs
+  for (i = 0; i < 4; ++i) {
+    vp10_idct8_c(input, outptr);
+    input += 8;
+    outptr += 8;
+  }
+
+  // Then transform columns
+  for (i = 0; i < 8; ++i) {
+    for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
+    vp10_idct8_c(temp_in, temp_out);
+    for (j = 0; j < 8; ++j) {
+      dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
+                                            ROUND_POWER_OF_TWO(temp_out[j], 5));
+    }
+  }
+}
+
+void vp10_idct16_c(const tran_low_t *input, tran_low_t *output) {
+  tran_low_t step1[16], step2[16];
+  tran_high_t temp1, temp2;
+
+  // stage 1
+  step1[0] = input[0 / 2];
+  step1[1] = input[16 / 2];
+  step1[2] = input[8 / 2];
+  step1[3] = input[24 / 2];
+  step1[4] = input[4 / 2];
+  step1[5] = input[20 / 2];
+  step1[6] = input[12 / 2];
+  step1[7] = input[28 / 2];
+  step1[8] = input[2 / 2];
+  step1[9] = input[18 / 2];
+  step1[10] = input[10 / 2];
+  step1[11] = input[26 / 2];
+  step1[12] = input[6 / 2];
+  step1[13] = input[22 / 2];
+  step1[14] = input[14 / 2];
+  step1[15] = input[30 / 2];
+
+  // stage 2
+  step2[0] = step1[0];
+  step2[1] = step1[1];
+  step2[2] = step1[2];
+  step2[3] = step1[3];
+  step2[4] = step1[4];
+  step2[5] = step1[5];
+  step2[6] = step1[6];
+  step2[7] = step1[7];
+
+  temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
+  temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
+  step2[8] = WRAPLOW(dct_const_round_shift(temp1));
+  step2[15] = WRAPLOW(dct_const_round_shift(temp2));
+
+  temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
+  temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
+  step2[9] = WRAPLOW(dct_const_round_shift(temp1));
+  step2[14] = WRAPLOW(dct_const_round_shift(temp2));
+
+  temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
+  temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
+  step2[10] = WRAPLOW(dct_const_round_shift(temp1));
+  step2[13] = WRAPLOW(dct_const_round_shift(temp2));
+
+  temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
+  temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
+  step2[11] = WRAPLOW(dct_const_round_shift(temp1));
+  step2[12] = WRAPLOW(dct_const_round_shift(temp2));
+
+  // stage 3
+  step1[0] = step2[0];
+  step1[1] = step2[1];
+  step1[2] = step2[2];
+  step1[3] = step2[3];
+
+  temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
+  temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
+  step1[4] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[7] = WRAPLOW(dct_const_round_shift(temp2));
+  temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
+  temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
+  step1[5] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[6] = WRAPLOW(dct_const_round_shift(temp2));
+
+  step1[8] = WRAPLOW(step2[8] + step2[9]);
+  step1[9] = WRAPLOW(step2[8] - step2[9]);
+  step1[10] = WRAPLOW(-step2[10] + step2[11]);
+  step1[11] = WRAPLOW(step2[10] + step2[11]);
+  step1[12] = WRAPLOW(step2[12] + step2[13]);
+  step1[13] = WRAPLOW(step2[12] - step2[13]);
+  step1[14] = WRAPLOW(-step2[14] + step2[15]);
+  step1[15] = WRAPLOW(step2[14] + step2[15]);
+
+  // stage 4
+  temp1 = (step1[0] + step1[1]) * cospi_16_64;
+  temp2 = (step1[0] - step1[1]) * cospi_16_64;
+  step2[0] = WRAPLOW(dct_const_round_shift(temp1));
+  step2[1] = WRAPLOW(dct_const_round_shift(temp2));
+  temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
+  temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
+  step2[2] = WRAPLOW(dct_const_round_shift(temp1));
+  step2[3] = WRAPLOW(dct_const_round_shift(temp2));
+  step2[4] = WRAPLOW(step1[4] + step1[5]);
+  step2[5] = WRAPLOW(step1[4] - step1[5]);
+  step2[6] = WRAPLOW(-step1[6] + step1[7]);
+  step2[7] = WRAPLOW(step1[6] + step1[7]);
+
+  step2[8] = step1[8];
+  step2[15] = step1[15];
+  temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
+  temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
+  step2[9] = WRAPLOW(dct_const_round_shift(temp1));
+  step2[14] = WRAPLOW(dct_const_round_shift(temp2));
+  temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
+  temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
+  step2[10] = WRAPLOW(dct_const_round_shift(temp1));
+  step2[13] = WRAPLOW(dct_const_round_shift(temp2));
+  step2[11] = step1[11];
+  step2[12] = step1[12];
+
+  // stage 5
+  step1[0] = WRAPLOW(step2[0] + step2[3]);
+  step1[1] = WRAPLOW(step2[1] + step2[2]);
+  step1[2] = WRAPLOW(step2[1] - step2[2]);
+  step1[3] = WRAPLOW(step2[0] - step2[3]);
+  step1[4] = step2[4];
+  temp1 = (step2[6] - step2[5]) * cospi_16_64;
+  temp2 = (step2[5] + step2[6]) * cospi_16_64;
+  step1[5] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[6] = WRAPLOW(dct_const_round_shift(temp2));
+  step1[7] = step2[7];
+
+  step1[8] = WRAPLOW(step2[8] + step2[11]);
+  step1[9] = WRAPLOW(step2[9] + step2[10]);
+  step1[10] = WRAPLOW(step2[9] - step2[10]);
+  step1[11] = WRAPLOW(step2[8] - step2[11]);
+  step1[12] = WRAPLOW(-step2[12] + step2[15]);
+  step1[13] = WRAPLOW(-step2[13] + step2[14]);
+  step1[14] = WRAPLOW(step2[13] + step2[14]);
+  step1[15] = WRAPLOW(step2[12] + step2[15]);
+
+  // stage 6
+  step2[0] = WRAPLOW(step1[0] + step1[7]);
+  step2[1] = WRAPLOW(step1[1] + step1[6]);
+  step2[2] = WRAPLOW(step1[2] + step1[5]);
+  step2[3] = WRAPLOW(step1[3] + step1[4]);
+  step2[4] = WRAPLOW(step1[3] - step1[4]);
+  step2[5] = WRAPLOW(step1[2] - step1[5]);
+  step2[6] = WRAPLOW(step1[1] - step1[6]);
+  step2[7] = WRAPLOW(step1[0] - step1[7]);
+  step2[8] = step1[8];
+  step2[9] = step1[9];
+  temp1 = (-step1[10] + step1[13]) * cospi_16_64;
+  temp2 = (step1[10] + step1[13]) * cospi_16_64;
+  step2[10] = WRAPLOW(dct_const_round_shift(temp1));
+  step2[13] = WRAPLOW(dct_const_round_shift(temp2));
+  temp1 = (-step1[11] + step1[12]) * cospi_16_64;
+  temp2 = (step1[11] + step1[12]) * cospi_16_64;
+  step2[11] = WRAPLOW(dct_const_round_shift(temp1));
+  step2[12] = WRAPLOW(dct_const_round_shift(temp2));
+  step2[14] = step1[14];
+  step2[15] = step1[15];
+
+  // stage 7
+  output[0] = WRAPLOW(step2[0] + step2[15]);
+  output[1] = WRAPLOW(step2[1] + step2[14]);
+  output[2] = WRAPLOW(step2[2] + step2[13]);
+  output[3] = WRAPLOW(step2[3] + step2[12]);
+  output[4] = WRAPLOW(step2[4] + step2[11]);
+  output[5] = WRAPLOW(step2[5] + step2[10]);
+  output[6] = WRAPLOW(step2[6] + step2[9]);
+  output[7] = WRAPLOW(step2[7] + step2[8]);
+  output[8] = WRAPLOW(step2[7] - step2[8]);
+  output[9] = WRAPLOW(step2[6] - step2[9]);
+  output[10] = WRAPLOW(step2[5] - step2[10]);
+  output[11] = WRAPLOW(step2[4] - step2[11]);
+  output[12] = WRAPLOW(step2[3] - step2[12]);
+  output[13] = WRAPLOW(step2[2] - step2[13]);
+  output[14] = WRAPLOW(step2[1] - step2[14]);
+  output[15] = WRAPLOW(step2[0] - step2[15]);
+}
+
+void vp10_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest,
+                              int stride) {
+  tran_low_t out[16 * 16];
+  tran_low_t *outptr = out;
+  int i, j;
+  tran_low_t temp_in[16], temp_out[16];
+
+  // First transform rows
+  for (i = 0; i < 16; ++i) {
+    vp10_idct16_c(input, outptr);
+    input += 16;
+    outptr += 16;
+  }
+
+  // Then transform columns
+  for (i = 0; i < 16; ++i) {
+    for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
+    vp10_idct16_c(temp_in, temp_out);
+    for (j = 0; j < 16; ++j) {
+      dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
+                                            ROUND_POWER_OF_TWO(temp_out[j], 6));
+    }
+  }
+}
+
+void vp10_iadst16_c(const tran_low_t *input, tran_low_t *output) {
+  tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
+  tran_high_t s9, s10, s11, s12, s13, s14, s15;
+
+  tran_high_t x0 = input[15];
+  tran_high_t x1 = input[0];
+  tran_high_t x2 = input[13];
+  tran_high_t x3 = input[2];
+  tran_high_t x4 = input[11];
+  tran_high_t x5 = input[4];
+  tran_high_t x6 = input[9];
+  tran_high_t x7 = input[6];
+  tran_high_t x8 = input[7];
+  tran_high_t x9 = input[8];
+  tran_high_t x10 = input[5];
+  tran_high_t x11 = input[10];
+  tran_high_t x12 = input[3];
+  tran_high_t x13 = input[12];
+  tran_high_t x14 = input[1];
+  tran_high_t x15 = input[14];
+
+  if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | x10 | x11 | x12 |
+        x13 | x14 | x15)) {
+    output[0] = output[1] = output[2] = output[3] = output[4] = output[5] =
+        output[6] = output[7] = output[8] = output[9] = output[10] =
+            output[11] = output[12] = output[13] = output[14] = output[15] = 0;
+    return;
+  }
+
+  // stage 1
+  s0 = x0 * cospi_1_64 + x1 * cospi_31_64;
+  s1 = x0 * cospi_31_64 - x1 * cospi_1_64;
+  s2 = x2 * cospi_5_64 + x3 * cospi_27_64;
+  s3 = x2 * cospi_27_64 - x3 * cospi_5_64;
+  s4 = x4 * cospi_9_64 + x5 * cospi_23_64;
+  s5 = x4 * cospi_23_64 - x5 * cospi_9_64;
+  s6 = x6 * cospi_13_64 + x7 * cospi_19_64;
+  s7 = x6 * cospi_19_64 - x7 * cospi_13_64;
+  s8 = x8 * cospi_17_64 + x9 * cospi_15_64;
+  s9 = x8 * cospi_15_64 - x9 * cospi_17_64;
+  s10 = x10 * cospi_21_64 + x11 * cospi_11_64;
+  s11 = x10 * cospi_11_64 - x11 * cospi_21_64;
+  s12 = x12 * cospi_25_64 + x13 * cospi_7_64;
+  s13 = x12 * cospi_7_64 - x13 * cospi_25_64;
+  s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
+  s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
+
+  x0 = WRAPLOW(dct_const_round_shift(s0 + s8));
+  x1 = WRAPLOW(dct_const_round_shift(s1 + s9));
+  x2 = WRAPLOW(dct_const_round_shift(s2 + s10));
+  x3 = WRAPLOW(dct_const_round_shift(s3 + s11));
+  x4 = WRAPLOW(dct_const_round_shift(s4 + s12));
+  x5 = WRAPLOW(dct_const_round_shift(s5 + s13));
+  x6 = WRAPLOW(dct_const_round_shift(s6 + s14));
+  x7 = WRAPLOW(dct_const_round_shift(s7 + s15));
+  x8 = WRAPLOW(dct_const_round_shift(s0 - s8));
+  x9 = WRAPLOW(dct_const_round_shift(s1 - s9));
+  x10 = WRAPLOW(dct_const_round_shift(s2 - s10));
+  x11 = WRAPLOW(dct_const_round_shift(s3 - s11));
+  x12 = WRAPLOW(dct_const_round_shift(s4 - s12));
+  x13 = WRAPLOW(dct_const_round_shift(s5 - s13));
+  x14 = WRAPLOW(dct_const_round_shift(s6 - s14));
+  x15 = WRAPLOW(dct_const_round_shift(s7 - s15));
+
+  // stage 2
+  s0 = x0;
+  s1 = x1;
+  s2 = x2;
+  s3 = x3;
+  s4 = x4;
+  s5 = x5;
+  s6 = x6;
+  s7 = x7;
+  s8 = x8 * cospi_4_64 + x9 * cospi_28_64;
+  s9 = x8 * cospi_28_64 - x9 * cospi_4_64;
+  s10 = x10 * cospi_20_64 + x11 * cospi_12_64;
+  s11 = x10 * cospi_12_64 - x11 * cospi_20_64;
+  s12 = -x12 * cospi_28_64 + x13 * cospi_4_64;
+  s13 = x12 * cospi_4_64 + x13 * cospi_28_64;
+  s14 = -x14 * cospi_12_64 + x15 * cospi_20_64;
+  s15 = x14 * cospi_20_64 + x15 * cospi_12_64;
+
+  x0 = WRAPLOW(s0 + s4);
+  x1 = WRAPLOW(s1 + s5);
+  x2 = WRAPLOW(s2 + s6);
+  x3 = WRAPLOW(s3 + s7);
+  x4 = WRAPLOW(s0 - s4);
+  x5 = WRAPLOW(s1 - s5);
+  x6 = WRAPLOW(s2 - s6);
+  x7 = WRAPLOW(s3 - s7);
+  x8 = WRAPLOW(dct_const_round_shift(s8 + s12));
+  x9 = WRAPLOW(dct_const_round_shift(s9 + s13));
+  x10 = WRAPLOW(dct_const_round_shift(s10 + s14));
+  x11 = WRAPLOW(dct_const_round_shift(s11 + s15));
+  x12 = WRAPLOW(dct_const_round_shift(s8 - s12));
+  x13 = WRAPLOW(dct_const_round_shift(s9 - s13));
+  x14 = WRAPLOW(dct_const_round_shift(s10 - s14));
+  x15 = WRAPLOW(dct_const_round_shift(s11 - s15));
+
+  // stage 3
+  s0 = x0;
+  s1 = x1;
+  s2 = x2;
+  s3 = x3;
+  s4 = x4 * cospi_8_64 + x5 * cospi_24_64;
+  s5 = x4 * cospi_24_64 - x5 * cospi_8_64;
+  s6 = -x6 * cospi_24_64 + x7 * cospi_8_64;
+  s7 = x6 * cospi_8_64 + x7 * cospi_24_64;
+  s8 = x8;
+  s9 = x9;
+  s10 = x10;
+  s11 = x11;
+  s12 = x12 * cospi_8_64 + x13 * cospi_24_64;
+  s13 = x12 * cospi_24_64 - x13 * cospi_8_64;
+  s14 = -x14 * cospi_24_64 + x15 * cospi_8_64;
+  s15 = x14 * cospi_8_64 + x15 * cospi_24_64;
+
+  x0 = WRAPLOW(s0 + s2);
+  x1 = WRAPLOW(s1 + s3);
+  x2 = WRAPLOW(s0 - s2);
+  x3 = WRAPLOW(s1 - s3);
+  x4 = WRAPLOW(dct_const_round_shift(s4 + s6));
+  x5 = WRAPLOW(dct_const_round_shift(s5 + s7));
+  x6 = WRAPLOW(dct_const_round_shift(s4 - s6));
+  x7 = WRAPLOW(dct_const_round_shift(s5 - s7));
+  x8 = WRAPLOW(s8 + s10);
+  x9 = WRAPLOW(s9 + s11);
+  x10 = WRAPLOW(s8 - s10);
+  x11 = WRAPLOW(s9 - s11);
+  x12 = WRAPLOW(dct_const_round_shift(s12 + s14));
+  x13 = WRAPLOW(dct_const_round_shift(s13 + s15));
+  x14 = WRAPLOW(dct_const_round_shift(s12 - s14));
+  x15 = WRAPLOW(dct_const_round_shift(s13 - s15));
+
+  // stage 4
+  s2 = (-cospi_16_64) * (x2 + x3);
+  s3 = cospi_16_64 * (x2 - x3);
+  s6 = cospi_16_64 * (x6 + x7);
+  s7 = cospi_16_64 * (-x6 + x7);
+  s10 = cospi_16_64 * (x10 + x11);
+  s11 = cospi_16_64 * (-x10 + x11);
+  s14 = (-cospi_16_64) * (x14 + x15);
+  s15 = cospi_16_64 * (x14 - x15);
+
+  x2 = WRAPLOW(dct_const_round_shift(s2));
+  x3 = WRAPLOW(dct_const_round_shift(s3));
+  x6 = WRAPLOW(dct_const_round_shift(s6));
+  x7 = WRAPLOW(dct_const_round_shift(s7));
+  x10 = WRAPLOW(dct_const_round_shift(s10));
+  x11 = WRAPLOW(dct_const_round_shift(s11));
+  x14 = WRAPLOW(dct_const_round_shift(s14));
+  x15 = WRAPLOW(dct_const_round_shift(s15));
+
+  output[0] = WRAPLOW(x0);
+  output[1] = WRAPLOW(-x8);
+  output[2] = WRAPLOW(x12);
+  output[3] = WRAPLOW(-x4);
+  output[4] = WRAPLOW(x6);
+  output[5] = WRAPLOW(x14);
+  output[6] = WRAPLOW(x10);
+  output[7] = WRAPLOW(x2);
+  output[8] = WRAPLOW(x3);
+  output[9] = WRAPLOW(x11);
+  output[10] = WRAPLOW(x15);
+  output[11] = WRAPLOW(x7);
+  output[12] = WRAPLOW(x5);
+  output[13] = WRAPLOW(-x13);
+  output[14] = WRAPLOW(x9);
+  output[15] = WRAPLOW(-x1);
+}
+
+void vp10_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest,
+                             int stride) {
+  tran_low_t out[16 * 16] = { 0 };
+  tran_low_t *outptr = out;
+  int i, j;
+  tran_low_t temp_in[16], temp_out[16];
+
+  // First transform rows. Since all non-zero dct coefficients are in
+  // upper-left 4x4 area, we only need to calculate first 4 rows here.
+  for (i = 0; i < 4; ++i) {
+    vp10_idct16_c(input, outptr);
+    input += 16;
+    outptr += 16;
+  }
+
+  // Then transform columns
+  for (i = 0; i < 16; ++i) {
+    for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
+    vp10_idct16_c(temp_in, temp_out);
+    for (j = 0; j < 16; ++j) {
+      dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
+                                            ROUND_POWER_OF_TWO(temp_out[j], 6));
+    }
+  }
+}
+
+void vp10_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest,
+                            int stride) {
+  int i, j;
+  tran_high_t a1;
+  tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
+  out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
+  a1 = ROUND_POWER_OF_TWO(out, 6);
+  for (j = 0; j < 16; ++j) {
+    for (i = 0; i < 16; ++i) dest[i] = clip_pixel_add(dest[i], a1);
+    dest += stride;
+  }
+}
+
+void vp10_idct32_c(const tran_low_t *input, tran_low_t *output) {
+  tran_low_t step1[32], step2[32];
+  tran_high_t temp1, temp2;
+
+  // stage 1
+  step1[0] = input[0];
+  step1[1] = input[16];
+  step1[2] = input[8];
+  step1[3] = input[24];
+  step1[4] = input[4];
+  step1[5] = input[20];
+  step1[6] = input[12];
+  step1[7] = input[28];
+  step1[8] = input[2];
+  step1[9] = input[18];
+  step1[10] = input[10];
+  step1[11] = input[26];
+  step1[12] = input[6];
+  step1[13] = input[22];
+  step1[14] = input[14];
+  step1[15] = input[30];
+
+  temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64;
+  temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64;
+  step1[16] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[31] = WRAPLOW(dct_const_round_shift(temp2));
+
+  temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64;
+  temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64;
+  step1[17] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[30] = WRAPLOW(dct_const_round_shift(temp2));
+
+  temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64;
+  temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64;
+  step1[18] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[29] = WRAPLOW(dct_const_round_shift(temp2));
+
+  temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64;
+  temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64;
+  step1[19] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[28] = WRAPLOW(dct_const_round_shift(temp2));
+
+  temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64;
+  temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64;
+  step1[20] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[27] = WRAPLOW(dct_const_round_shift(temp2));
+
+  temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64;
+  temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64;
+  step1[21] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[26] = WRAPLOW(dct_const_round_shift(temp2));
+
+  temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64;
+  temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64;
+  step1[22] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[25] = WRAPLOW(dct_const_round_shift(temp2));
+
+  temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64;
+  temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64;
+  step1[23] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[24] = WRAPLOW(dct_const_round_shift(temp2));
+
+  // stage 2
+  step2[0] = step1[0];
+  step2[1] = step1[1];
+  step2[2] = step1[2];
+  step2[3] = step1[3];
+  step2[4] = step1[4];
+  step2[5] = step1[5];
+  step2[6] = step1[6];
+  step2[7] = step1[7];
+
+  temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
+  temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
+  step2[8] = WRAPLOW(dct_const_round_shift(temp1));
+  step2[15] = WRAPLOW(dct_const_round_shift(temp2));
+
+  temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
+  temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
+  step2[9] = WRAPLOW(dct_const_round_shift(temp1));
+  step2[14] = WRAPLOW(dct_const_round_shift(temp2));
+
+  temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
+  temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
+  step2[10] = WRAPLOW(dct_const_round_shift(temp1));
+  step2[13] = WRAPLOW(dct_const_round_shift(temp2));
+
+  temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
+  temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
+  step2[11] = WRAPLOW(dct_const_round_shift(temp1));
+  step2[12] = WRAPLOW(dct_const_round_shift(temp2));
+
+  step2[16] = WRAPLOW(step1[16] + step1[17]);
+  step2[17] = WRAPLOW(step1[16] - step1[17]);
+  step2[18] = WRAPLOW(-step1[18] + step1[19]);
+  step2[19] = WRAPLOW(step1[18] + step1[19]);
+  step2[20] = WRAPLOW(step1[20] + step1[21]);
+  step2[21] = WRAPLOW(step1[20] - step1[21]);
+  step2[22] = WRAPLOW(-step1[22] + step1[23]);
+  step2[23] = WRAPLOW(step1[22] + step1[23]);
+  step2[24] = WRAPLOW(step1[24] + step1[25]);
+  step2[25] = WRAPLOW(step1[24] - step1[25]);
+  step2[26] = WRAPLOW(-step1[26] + step1[27]);
+  step2[27] = WRAPLOW(step1[26] + step1[27]);
+  step2[28] = WRAPLOW(step1[28] + step1[29]);
+  step2[29] = WRAPLOW(step1[28] - step1[29]);
+  step2[30] = WRAPLOW(-step1[30] + step1[31]);
+  step2[31] = WRAPLOW(step1[30] + step1[31]);
+
+  // stage 3
+  step1[0] = step2[0];
+  step1[1] = step2[1];
+  step1[2] = step2[2];
+  step1[3] = step2[3];
+
+  temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
+  temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
+  step1[4] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[7] = WRAPLOW(dct_const_round_shift(temp2));
+  temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
+  temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
+  step1[5] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[6] = WRAPLOW(dct_const_round_shift(temp2));
+
+  step1[8] = WRAPLOW(step2[8] + step2[9]);
+  step1[9] = WRAPLOW(step2[8] - step2[9]);
+  step1[10] = WRAPLOW(-step2[10] + step2[11]);
+  step1[11] = WRAPLOW(step2[10] + step2[11]);
+  step1[12] = WRAPLOW(step2[12] + step2[13]);
+  step1[13] = WRAPLOW(step2[12] - step2[13]);
+  step1[14] = WRAPLOW(-step2[14] + step2[15]);
+  step1[15] = WRAPLOW(step2[14] + step2[15]);
+
+  step1[16] = step2[16];
+  step1[31] = step2[31];
+  temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64;
+  temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64;
+  step1[17] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[30] = WRAPLOW(dct_const_round_shift(temp2));
+  temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64;
+  temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64;
+  step1[18] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[29] = WRAPLOW(dct_const_round_shift(temp2));
+  step1[19] = step2[19];
+  step1[20] = step2[20];
+  temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64;
+  temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64;
+  step1[21] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[26] = WRAPLOW(dct_const_round_shift(temp2));
+  temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64;
+  temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64;
+  step1[22] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[25] = WRAPLOW(dct_const_round_shift(temp2));
+  step1[23] = step2[23];
+  step1[24] = step2[24];
+  step1[27] = step2[27];
+  step1[28] = step2[28];
+
+  // stage 4
+  temp1 = (step1[0] + step1[1]) * cospi_16_64;
+  temp2 = (step1[0] - step1[1]) * cospi_16_64;
+  step2[0] = WRAPLOW(dct_const_round_shift(temp1));
+  step2[1] = WRAPLOW(dct_const_round_shift(temp2));
+  temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
+  temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
+  step2[2] = WRAPLOW(dct_const_round_shift(temp1));
+  step2[3] = WRAPLOW(dct_const_round_shift(temp2));
+  step2[4] = WRAPLOW(step1[4] + step1[5]);
+  step2[5] = WRAPLOW(step1[4] - step1[5]);
+  step2[6] = WRAPLOW(-step1[6] + step1[7]);
+  step2[7] = WRAPLOW(step1[6] + step1[7]);
+
+  step2[8] = step1[8];
+  step2[15] = step1[15];
+  temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
+  temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
+  step2[9] = WRAPLOW(dct_const_round_shift(temp1));
+  step2[14] = WRAPLOW(dct_const_round_shift(temp2));
+  temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
+  temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
+  step2[10] = WRAPLOW(dct_const_round_shift(temp1));
+  step2[13] = WRAPLOW(dct_const_round_shift(temp2));
+  step2[11] = step1[11];
+  step2[12] = step1[12];
+
+  step2[16] = WRAPLOW(step1[16] + step1[19]);
+  step2[17] = WRAPLOW(step1[17] + step1[18]);
+  step2[18] = WRAPLOW(step1[17] - step1[18]);
+  step2[19] = WRAPLOW(step1[16] - step1[19]);
+  step2[20] = WRAPLOW(-step1[20] + step1[23]);
+  step2[21] = WRAPLOW(-step1[21] + step1[22]);
+  step2[22] = WRAPLOW(step1[21] + step1[22]);
+  step2[23] = WRAPLOW(step1[20] + step1[23]);
+
+  step2[24] = WRAPLOW(step1[24] + step1[27]);
+  step2[25] = WRAPLOW(step1[25] + step1[26]);
+  step2[26] = WRAPLOW(step1[25] - step1[26]);
+  step2[27] = WRAPLOW(step1[24] - step1[27]);
+  step2[28] = WRAPLOW(-step1[28] + step1[31]);
+  step2[29] = WRAPLOW(-step1[29] + step1[30]);
+  step2[30] = WRAPLOW(step1[29] + step1[30]);
+  step2[31] = WRAPLOW(step1[28] + step1[31]);
+
+  // stage 5
+  step1[0] = WRAPLOW(step2[0] + step2[3]);
+  step1[1] = WRAPLOW(step2[1] + step2[2]);
+  step1[2] = WRAPLOW(step2[1] - step2[2]);
+  step1[3] = WRAPLOW(step2[0] - step2[3]);
+  step1[4] = step2[4];
+  temp1 = (step2[6] - step2[5]) * cospi_16_64;
+  temp2 = (step2[5] + step2[6]) * cospi_16_64;
+  step1[5] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[6] = WRAPLOW(dct_const_round_shift(temp2));
+  step1[7] = step2[7];
+
+  step1[8] = WRAPLOW(step2[8] + step2[11]);
+  step1[9] = WRAPLOW(step2[9] + step2[10]);
+  step1[10] = WRAPLOW(step2[9] - step2[10]);
+  step1[11] = WRAPLOW(step2[8] - step2[11]);
+  step1[12] = WRAPLOW(-step2[12] + step2[15]);
+  step1[13] = WRAPLOW(-step2[13] + step2[14]);
+  step1[14] = WRAPLOW(step2[13] + step2[14]);
+  step1[15] = WRAPLOW(step2[12] + step2[15]);
+
+  step1[16] = step2[16];
+  step1[17] = step2[17];
+  temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64;
+  temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64;
+  step1[18] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[29] = WRAPLOW(dct_const_round_shift(temp2));
+  temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64;
+  temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64;
+  step1[19] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[28] = WRAPLOW(dct_const_round_shift(temp2));
+  temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64;
+  temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64;
+  step1[20] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[27] = WRAPLOW(dct_const_round_shift(temp2));
+  temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64;
+  temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64;
+  step1[21] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[26] = WRAPLOW(dct_const_round_shift(temp2));
+  step1[22] = step2[22];
+  step1[23] = step2[23];
+  step1[24] = step2[24];
+  step1[25] = step2[25];
+  step1[30] = step2[30];
+  step1[31] = step2[31];
+
+  // stage 6
+  step2[0] = WRAPLOW(step1[0] + step1[7]);
+  step2[1] = WRAPLOW(step1[1] + step1[6]);
+  step2[2] = WRAPLOW(step1[2] + step1[5]);
+  step2[3] = WRAPLOW(step1[3] + step1[4]);
+  step2[4] = WRAPLOW(step1[3] - step1[4]);
+  step2[5] = WRAPLOW(step1[2] - step1[5]);
+  step2[6] = WRAPLOW(step1[1] - step1[6]);
+  step2[7] = WRAPLOW(step1[0] - step1[7]);
+  step2[8] = step1[8];
+  step2[9] = step1[9];
+  temp1 = (-step1[10] + step1[13]) * cospi_16_64;
+  temp2 = (step1[10] + step1[13]) * cospi_16_64;
+  step2[10] = WRAPLOW(dct_const_round_shift(temp1));
+  step2[13] = WRAPLOW(dct_const_round_shift(temp2));
+  temp1 = (-step1[11] + step1[12]) * cospi_16_64;
+  temp2 = (step1[11] + step1[12]) * cospi_16_64;
+  step2[11] = WRAPLOW(dct_const_round_shift(temp1));
+  step2[12] = WRAPLOW(dct_const_round_shift(temp2));
+  step2[14] = step1[14];
+  step2[15] = step1[15];
+
+  step2[16] = WRAPLOW(step1[16] + step1[23]);
+  step2[17] = WRAPLOW(step1[17] + step1[22]);
+  step2[18] = WRAPLOW(step1[18] + step1[21]);
+  step2[19] = WRAPLOW(step1[19] + step1[20]);
+  step2[20] = WRAPLOW(step1[19] - step1[20]);
+  step2[21] = WRAPLOW(step1[18] - step1[21]);
+  step2[22] = WRAPLOW(step1[17] - step1[22]);
+  step2[23] = WRAPLOW(step1[16] - step1[23]);
+
+  step2[24] = WRAPLOW(-step1[24] + step1[31]);
+  step2[25] = WRAPLOW(-step1[25] + step1[30]);
+  step2[26] = WRAPLOW(-step1[26] + step1[29]);
+  step2[27] = WRAPLOW(-step1[27] + step1[28]);
+  step2[28] = WRAPLOW(step1[27] + step1[28]);
+  step2[29] = WRAPLOW(step1[26] + step1[29]);
+  step2[30] = WRAPLOW(step1[25] + step1[30]);
+  step2[31] = WRAPLOW(step1[24] + step1[31]);
+
+  // stage 7
+  step1[0] = WRAPLOW(step2[0] + step2[15]);
+  step1[1] = WRAPLOW(step2[1] + step2[14]);
+  step1[2] = WRAPLOW(step2[2] + step2[13]);
+  step1[3] = WRAPLOW(step2[3] + step2[12]);
+  step1[4] = WRAPLOW(step2[4] + step2[11]);
+  step1[5] = WRAPLOW(step2[5] + step2[10]);
+  step1[6] = WRAPLOW(step2[6] + step2[9]);
+  step1[7] = WRAPLOW(step2[7] + step2[8]);
+  step1[8] = WRAPLOW(step2[7] - step2[8]);
+  step1[9] = WRAPLOW(step2[6] - step2[9]);
+  step1[10] = WRAPLOW(step2[5] - step2[10]);
+  step1[11] = WRAPLOW(step2[4] - step2[11]);
+  step1[12] = WRAPLOW(step2[3] - step2[12]);
+  step1[13] = WRAPLOW(step2[2] - step2[13]);
+  step1[14] = WRAPLOW(step2[1] - step2[14]);
+  step1[15] = WRAPLOW(step2[0] - step2[15]);
+
+  step1[16] = step2[16];
+  step1[17] = step2[17];
+  step1[18] = step2[18];
+  step1[19] = step2[19];
+  temp1 = (-step2[20] + step2[27]) * cospi_16_64;
+  temp2 = (step2[20] + step2[27]) * cospi_16_64;
+  step1[20] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[27] = WRAPLOW(dct_const_round_shift(temp2));
+  temp1 = (-step2[21] + step2[26]) * cospi_16_64;
+  temp2 = (step2[21] + step2[26]) * cospi_16_64;
+  step1[21] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[26] = WRAPLOW(dct_const_round_shift(temp2));
+  temp1 = (-step2[22] + step2[25]) * cospi_16_64;
+  temp2 = (step2[22] + step2[25]) * cospi_16_64;
+  step1[22] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[25] = WRAPLOW(dct_const_round_shift(temp2));
+  temp1 = (-step2[23] + step2[24]) * cospi_16_64;
+  temp2 = (step2[23] + step2[24]) * cospi_16_64;
+  step1[23] = WRAPLOW(dct_const_round_shift(temp1));
+  step1[24] = WRAPLOW(dct_const_round_shift(temp2));
+  step1[28] = step2[28];
+  step1[29] = step2[29];
+  step1[30] = step2[30];
+  step1[31] = step2[31];
+
+  // final stage
+  output[0] = WRAPLOW(step1[0] + step1[31]);
+  output[1] = WRAPLOW(step1[1] + step1[30]);
+  output[2] = WRAPLOW(step1[2] + step1[29]);
+  output[3] = WRAPLOW(step1[3] + step1[28]);
+  output[4] = WRAPLOW(step1[4] + step1[27]);
+  output[5] = WRAPLOW(step1[5] + step1[26]);
+  output[6] = WRAPLOW(step1[6] + step1[25]);
+  output[7] = WRAPLOW(step1[7] + step1[24]);
+  output[8] = WRAPLOW(step1[8] + step1[23]);
+  output[9] = WRAPLOW(step1[9] + step1[22]);
+  output[10] = WRAPLOW(step1[10] + step1[21]);
+  output[11] = WRAPLOW(step1[11] + step1[20]);
+  output[12] = WRAPLOW(step1[12] + step1[19]);
+  output[13] = WRAPLOW(step1[13] + step1[18]);
+  output[14] = WRAPLOW(step1[14] + step1[17]);
+  output[15] = WRAPLOW(step1[15] + step1[16]);
+  output[16] = WRAPLOW(step1[15] - step1[16]);
+  output[17] = WRAPLOW(step1[14] - step1[17]);
+  output[18] = WRAPLOW(step1[13] - step1[18]);
+  output[19] = WRAPLOW(step1[12] - step1[19]);
+  output[20] = WRAPLOW(step1[11] - step1[20]);
+  output[21] = WRAPLOW(step1[10] - step1[21]);
+  output[22] = WRAPLOW(step1[9] - step1[22]);
+  output[23] = WRAPLOW(step1[8] - step1[23]);
+  output[24] = WRAPLOW(step1[7] - step1[24]);
+  output[25] = WRAPLOW(step1[6] - step1[25]);
+  output[26] = WRAPLOW(step1[5] - step1[26]);
+  output[27] = WRAPLOW(step1[4] - step1[27]);
+  output[28] = WRAPLOW(step1[3] - step1[28]);
+  output[29] = WRAPLOW(step1[2] - step1[29]);
+  output[30] = WRAPLOW(step1[1] - step1[30]);
+  output[31] = WRAPLOW(step1[0] - step1[31]);
+}
+
+void vp10_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
+                               int stride) {
+  tran_low_t out[32 * 32];
+  tran_low_t *outptr = out;
+  int i, j;
+  tran_low_t temp_in[32], temp_out[32];
+
+  // Rows
+  for (i = 0; i < 32; ++i) {
+    int16_t zero_coeff[16];
+    for (j = 0; j < 16; ++j) zero_coeff[j] = input[2 * j] | input[2 * j + 1];
+    for (j = 0; j < 8; ++j)
+      zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
+    for (j = 0; j < 4; ++j)
+      zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
+    for (j = 0; j < 2; ++j)
+      zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
+
+    if (zero_coeff[0] | zero_coeff[1])
+      vp10_idct32_c(input, outptr);
+    else
+      memset(outptr, 0, sizeof(tran_low_t) * 32);
+    input += 32;
+    outptr += 32;
+  }
+
+  // Columns
+  for (i = 0; i < 32; ++i) {
+    for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i];
+    vp10_idct32_c(temp_in, temp_out);
+    for (j = 0; j < 32; ++j) {
+      dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
+                                            ROUND_POWER_OF_TWO(temp_out[j], 6));
+    }
+  }
+}
+
+void vp10_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest,
+                             int stride) {
+  tran_low_t out[32 * 32] = { 0 };
+  tran_low_t *outptr = out;
+  int i, j;
+  tran_low_t temp_in[32], temp_out[32];
+
+  // Rows
+  // only upper-left 8x8 has non-zero coeff
+  for (i = 0; i < 8; ++i) {
+    vp10_idct32_c(input, outptr);
+    input += 32;
+    outptr += 32;
+  }
+
+  // Columns
+  for (i = 0; i < 32; ++i) {
+    for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i];
+    vp10_idct32_c(temp_in, temp_out);
+    for (j = 0; j < 32; ++j) {
+      dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
+                                            ROUND_POWER_OF_TWO(temp_out[j], 6));
+    }
+  }
+}
+
+void vp10_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest,
+                            int stride) {
+  int i, j;
+  tran_high_t a1;
+
+  tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
+  out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
+  a1 = ROUND_POWER_OF_TWO(out, 6);
+
+  for (j = 0; j < 32; ++j) {
+    for (i = 0; i < 32; ++i) dest[i] = clip_pixel_add(dest[i], a1);
+    dest += stride;
+  }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
+                                  int stride, int bd) {
+  /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
+     0.5 shifts per pixel. */
+  int i;
+  tran_low_t output[16];
+  tran_high_t a1, b1, c1, d1, e1;
+  const tran_low_t *ip = input;
+  tran_low_t *op = output;
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+  for (i = 0; i < 4; i++) {
+    a1 = ip[0] >> UNIT_QUANT_SHIFT;
+    c1 = ip[1] >> UNIT_QUANT_SHIFT;
+    d1 = ip[2] >> UNIT_QUANT_SHIFT;
+    b1 = ip[3] >> UNIT_QUANT_SHIFT;
+    a1 += c1;
+    d1 -= b1;
+    e1 = (a1 - d1) >> 1;
+    b1 = e1 - b1;
+    c1 = e1 - c1;
+    a1 -= b1;
+    d1 += c1;
+    op[0] = HIGHBD_WRAPLOW(a1, bd);
+    op[1] = HIGHBD_WRAPLOW(b1, bd);
+    op[2] = HIGHBD_WRAPLOW(c1, bd);
+    op[3] = HIGHBD_WRAPLOW(d1, bd);
+    ip += 4;
+    op += 4;
+  }
+
+  ip = output;
+  for (i = 0; i < 4; i++) {
+    a1 = ip[4 * 0];
+    c1 = ip[4 * 1];
+    d1 = ip[4 * 2];
+    b1 = ip[4 * 3];
+    a1 += c1;
+    d1 -= b1;
+    e1 = (a1 - d1) >> 1;
+    b1 = e1 - b1;
+    c1 = e1 - c1;
+    a1 -= b1;
+    d1 += c1;
+    dest[stride * 0] =
+        highbd_clip_pixel_add(dest[stride * 0], HIGHBD_WRAPLOW(a1, bd), bd);
+    dest[stride * 1] =
+        highbd_clip_pixel_add(dest[stride * 1], HIGHBD_WRAPLOW(b1, bd), bd);
+    dest[stride * 2] =
+        highbd_clip_pixel_add(dest[stride * 2], HIGHBD_WRAPLOW(c1, bd), bd);
+    dest[stride * 3] =
+        highbd_clip_pixel_add(dest[stride * 3], HIGHBD_WRAPLOW(d1, bd), bd);
+    ip++;
+    dest++;
+  }
+}
+
+void vp10_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8,
+                                 int dest_stride, int bd) {
+  int i;
+  tran_high_t a1, e1;
+  tran_low_t tmp[4];
+  const tran_low_t *ip = in;
+  tran_low_t *op = tmp;
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+  (void)bd;
+
+  a1 = ip[0] >> UNIT_QUANT_SHIFT;
+  e1 = a1 >> 1;
+  a1 -= e1;
+  op[0] = HIGHBD_WRAPLOW(a1, bd);
+  op[1] = op[2] = op[3] = HIGHBD_WRAPLOW(e1, bd);
+
+  ip = tmp;
+  for (i = 0; i < 4; i++) {
+    e1 = ip[0] >> 1;
+    a1 = ip[0] - e1;
+    dest[dest_stride * 0] =
+        highbd_clip_pixel_add(dest[dest_stride * 0], a1, bd);
+    dest[dest_stride * 1] =
+        highbd_clip_pixel_add(dest[dest_stride * 1], e1, bd);
+    dest[dest_stride * 2] =
+        highbd_clip_pixel_add(dest[dest_stride * 2], e1, bd);
+    dest[dest_stride * 3] =
+        highbd_clip_pixel_add(dest[dest_stride * 3], e1, bd);
+    ip++;
+    dest++;
+  }
+}
+
+void vp10_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) {
+  tran_low_t step[4];
+  tran_high_t temp1, temp2;
+  (void)bd;
+  // stage 1
+  temp1 = (input[0] + input[2]) * cospi_16_64;
+  temp2 = (input[0] - input[2]) * cospi_16_64;
+  step[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64;
+  temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64;
+  step[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+  // stage 2
+  output[0] = HIGHBD_WRAPLOW(step[0] + step[3], bd);
+  output[1] = HIGHBD_WRAPLOW(step[1] + step[2], bd);
+  output[2] = HIGHBD_WRAPLOW(step[1] - step[2], bd);
+  output[3] = HIGHBD_WRAPLOW(step[0] - step[3], bd);
+}
+
+void vp10_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
+                                  int stride, int bd) {
+  tran_low_t out[4 * 4];
+  tran_low_t *outptr = out;
+  int i, j;
+  tran_low_t temp_in[4], temp_out[4];
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+  // Rows
+  for (i = 0; i < 4; ++i) {
+    vp10_highbd_idct4_c(input, outptr, bd);
+    input += 4;
+    outptr += 4;
+  }
+
+  // Columns
+  for (i = 0; i < 4; ++i) {
+    for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
+    vp10_highbd_idct4_c(temp_in, temp_out, bd);
+    for (j = 0; j < 4; ++j) {
+      dest[j * stride + i] = highbd_clip_pixel_add(
+          dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
+    }
+  }
+}
+
+void vp10_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8,
+                                 int dest_stride, int bd) {
+  int i;
+  tran_high_t a1;
+  tran_low_t out =
+      HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[0] * cospi_16_64), bd);
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+  out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd);
+  a1 = ROUND_POWER_OF_TWO(out, 4);
+
+  for (i = 0; i < 4; i++) {
+    dest[0] = highbd_clip_pixel_add(dest[0], a1, bd);
+    dest[1] = highbd_clip_pixel_add(dest[1], a1, bd);
+    dest[2] = highbd_clip_pixel_add(dest[2], a1, bd);
+    dest[3] = highbd_clip_pixel_add(dest[3], a1, bd);
+    dest += dest_stride;
+  }
+}
+
+void vp10_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) {
+  tran_low_t step1[8], step2[8];
+  tran_high_t temp1, temp2;
+  // stage 1
+  step1[0] = input[0];
+  step1[2] = input[4];
+  step1[1] = input[2];
+  step1[3] = input[6];
+  temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64;
+  temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64;
+  step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64;
+  temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64;
+  step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+  // stage 2 & stage 3 - even half
+  vp10_highbd_idct4_c(step1, step1, bd);
+
+  // stage 2 - odd half
+  step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd);
+  step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd);
+  step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd);
+  step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd);
+
+  // stage 3 - odd half
+  step1[4] = step2[4];
+  temp1 = (step2[6] - step2[5]) * cospi_16_64;
+  temp2 = (step2[5] + step2[6]) * cospi_16_64;
+  step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  step1[7] = step2[7];
+
+  // stage 4
+  output[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd);
+  output[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd);
+  output[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd);
+  output[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd);
+  output[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd);
+  output[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd);
+  output[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd);
+  output[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd);
+}
+
+void vp10_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
+                                  int stride, int bd) {
+  tran_low_t out[8 * 8];
+  tran_low_t *outptr = out;
+  int i, j;
+  tran_low_t temp_in[8], temp_out[8];
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+  // First transform rows.
+  for (i = 0; i < 8; ++i) {
+    vp10_highbd_idct8_c(input, outptr, bd);
+    input += 8;
+    outptr += 8;
+  }
+
+  // Then transform columns.
+  for (i = 0; i < 8; ++i) {
+    for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
+    vp10_highbd_idct8_c(temp_in, temp_out, bd);
+    for (j = 0; j < 8; ++j) {
+      dest[j * stride + i] = highbd_clip_pixel_add(
+          dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
+    }
+  }
+}
+
+void vp10_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8,
+                                 int stride, int bd) {
+  int i, j;
+  tran_high_t a1;
+  tran_low_t out =
+      HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[0] * cospi_16_64), bd);
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+  out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd);
+  a1 = ROUND_POWER_OF_TWO(out, 5);
+  for (j = 0; j < 8; ++j) {
+    for (i = 0; i < 8; ++i) dest[i] = highbd_clip_pixel_add(dest[i], a1, bd);
+    dest += stride;
+  }
+}
+
+void vp10_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) {
+  tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
+
+  tran_low_t x0 = input[0];
+  tran_low_t x1 = input[1];
+  tran_low_t x2 = input[2];
+  tran_low_t x3 = input[3];
+  (void)bd;
+
+  if (!(x0 | x1 | x2 | x3)) {
+    memset(output, 0, 4 * sizeof(*output));
+    return;
+  }
+
+  s0 = sinpi_1_9 * x0;
+  s1 = sinpi_2_9 * x0;
+  s2 = sinpi_3_9 * x1;
+  s3 = sinpi_4_9 * x2;
+  s4 = sinpi_1_9 * x2;
+  s5 = sinpi_2_9 * x3;
+  s6 = sinpi_4_9 * x3;
+  s7 = (tran_high_t)HIGHBD_WRAPLOW(x0 - x2 + x3, bd);
+
+  s0 = s0 + s3 + s5;
+  s1 = s1 - s4 - s6;
+  s3 = s2;
+  s2 = sinpi_3_9 * s7;
+
+  // 1-D transform scaling factor is sqrt(2).
+  // The overall dynamic range is 14b (input) + 14b (multiplication scaling)
+  // + 1b (addition) = 29b.
+  // Hence the output bit depth is 15b.
+  output[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s3), bd);
+  output[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s3), bd);
+  output[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd);
+  output[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3), bd);
+}
+
+void vp10_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
+  tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
+
+  tran_low_t x0 = input[7];
+  tran_low_t x1 = input[0];
+  tran_low_t x2 = input[5];
+  tran_low_t x3 = input[2];
+  tran_low_t x4 = input[3];
+  tran_low_t x5 = input[4];
+  tran_low_t x6 = input[1];
+  tran_low_t x7 = input[6];
+  (void)bd;
+
+  if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) {
+    memset(output, 0, 8 * sizeof(*output));
+    return;
+  }
+
+  // stage 1
+  s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
+  s1 = cospi_30_64 * x0 - cospi_2_64 * x1;
+  s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
+  s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
+  s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
+  s5 = cospi_14_64 * x4 - cospi_18_64 * x5;
+  s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
+  s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
+
+  x0 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s4), bd);
+  x1 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s5), bd);
+  x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 + s6), bd);
+  x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 + s7), bd);
+  x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 - s4), bd);
+  x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 - s5), bd);
+  x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 - s6), bd);
+  x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 - s7), bd);
+
+  // stage 2
+  s0 = x0;
+  s1 = x1;
+  s2 = x2;
+  s3 = x3;
+  s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
+  s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
+  s6 = -cospi_24_64 * x6 + cospi_8_64 * x7;
+  s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
+
+  x0 = HIGHBD_WRAPLOW(s0 + s2, bd);
+  x1 = HIGHBD_WRAPLOW(s1 + s3, bd);
+  x2 = HIGHBD_WRAPLOW(s0 - s2, bd);
+  x3 = HIGHBD_WRAPLOW(s1 - s3, bd);
+  x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s6), bd);
+  x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s7), bd);
+  x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s6), bd);
+  x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s7), bd);
+
+  // stage 3
+  s2 = cospi_16_64 * (x2 + x3);
+  s3 = cospi_16_64 * (x2 - x3);
+  s6 = cospi_16_64 * (x6 + x7);
+  s7 = cospi_16_64 * (x6 - x7);
+
+  x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd);
+  x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3), bd);
+  x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6), bd);
+  x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7), bd);
+
+  output[0] = HIGHBD_WRAPLOW(x0, bd);
+  output[1] = HIGHBD_WRAPLOW(-x4, bd);
+  output[2] = HIGHBD_WRAPLOW(x6, bd);
+  output[3] = HIGHBD_WRAPLOW(-x2, bd);
+  output[4] = HIGHBD_WRAPLOW(x3, bd);
+  output[5] = HIGHBD_WRAPLOW(-x7, bd);
+  output[6] = HIGHBD_WRAPLOW(x5, bd);
+  output[7] = HIGHBD_WRAPLOW(-x1, bd);
+}
+
+void vp10_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8,
+                                  int stride, int bd) {
+  tran_low_t out[8 * 8] = { 0 };
+  tran_low_t *outptr = out;
+  int i, j;
+  tran_low_t temp_in[8], temp_out[8];
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+  // First transform rows.
+  // Only first 4 row has non-zero coefs.
+  for (i = 0; i < 4; ++i) {
+    vp10_highbd_idct8_c(input, outptr, bd);
+    input += 8;
+    outptr += 8;
+  }
+  // Then transform columns.
+  for (i = 0; i < 8; ++i) {
+    for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
+    vp10_highbd_idct8_c(temp_in, temp_out, bd);
+    for (j = 0; j < 8; ++j) {
+      dest[j * stride + i] = highbd_clip_pixel_add(
+          dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
+    }
+  }
+}
+
+void vp10_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) {
+  tran_low_t step1[16], step2[16];
+  tran_high_t temp1, temp2;
+  (void)bd;
+
+  // stage 1
+  step1[0] = input[0 / 2];
+  step1[1] = input[16 / 2];
+  step1[2] = input[8 / 2];
+  step1[3] = input[24 / 2];
+  step1[4] = input[4 / 2];
+  step1[5] = input[20 / 2];
+  step1[6] = input[12 / 2];
+  step1[7] = input[28 / 2];
+  step1[8] = input[2 / 2];
+  step1[9] = input[18 / 2];
+  step1[10] = input[10 / 2];
+  step1[11] = input[26 / 2];
+  step1[12] = input[6 / 2];
+  step1[13] = input[22 / 2];
+  step1[14] = input[14 / 2];
+  step1[15] = input[30 / 2];
+
+  // stage 2
+  step2[0] = step1[0];
+  step2[1] = step1[1];
+  step2[2] = step1[2];
+  step2[3] = step1[3];
+  step2[4] = step1[4];
+  step2[5] = step1[5];
+  step2[6] = step1[6];
+  step2[7] = step1[7];
+
+  temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
+  temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
+  step2[8] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step2[15] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+  temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
+  temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
+  step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+  temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
+  temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
+  step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+  temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
+  temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
+  step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+  // stage 3
+  step1[0] = step2[0];
+  step1[1] = step2[1];
+  step1[2] = step2[2];
+  step1[3] = step2[3];
+
+  temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
+  temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
+  step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
+  temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
+  step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+  step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd);
+  step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd);
+  step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd);
+  step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd);
+  step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd);
+  step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd);
+  step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd);
+  step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd);
+
+  // stage 4
+  temp1 = (step1[0] + step1[1]) * cospi_16_64;
+  temp2 = (step1[0] - step1[1]) * cospi_16_64;
+  step2[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step2[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
+  temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
+  step2[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step2[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd);
+  step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd);
+  step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd);
+  step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd);
+
+  step2[8] = step1[8];
+  step2[15] = step1[15];
+  temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
+  temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
+  step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
+  temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
+  step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  step2[11] = step1[11];
+  step2[12] = step1[12];
+
+  // stage 5
+  step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd);
+  step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd);
+  step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd);
+  step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd);
+  step1[4] = step2[4];
+  temp1 = (step2[6] - step2[5]) * cospi_16_64;
+  temp2 = (step2[5] + step2[6]) * cospi_16_64;
+  step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  step1[7] = step2[7];
+
+  step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd);
+  step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd);
+  step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd);
+  step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd);
+  step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd);
+  step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd);
+  step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd);
+  step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd);
+
+  // stage 6
+  step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd);
+  step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd);
+  step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd);
+  step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd);
+  step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd);
+  step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd);
+  step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd);
+  step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd);
+  step2[8] = step1[8];
+  step2[9] = step1[9];
+  temp1 = (-step1[10] + step1[13]) * cospi_16_64;
+  temp2 = (step1[10] + step1[13]) * cospi_16_64;
+  step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  temp1 = (-step1[11] + step1[12]) * cospi_16_64;
+  temp2 = (step1[11] + step1[12]) * cospi_16_64;
+  step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  step2[14] = step1[14];
+  step2[15] = step1[15];
+
+  // stage 7
+  output[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd);
+  output[1] = HIGHBD_WRAPLOW(step2[1] + step2[14], bd);
+  output[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd);
+  output[3] = HIGHBD_WRAPLOW(step2[3] + step2[12], bd);
+  output[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd);
+  output[5] = HIGHBD_WRAPLOW(step2[5] + step2[10], bd);
+  output[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd);
+  output[7] = HIGHBD_WRAPLOW(step2[7] + step2[8], bd);
+  output[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd);
+  output[9] = HIGHBD_WRAPLOW(step2[6] - step2[9], bd);
+  output[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd);
+  output[11] = HIGHBD_WRAPLOW(step2[4] - step2[11], bd);
+  output[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd);
+  output[13] = HIGHBD_WRAPLOW(step2[2] - step2[13], bd);
+  output[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd);
+  output[15] = HIGHBD_WRAPLOW(step2[0] - step2[15], bd);
+}
+
+void vp10_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
+                                     int stride, int bd) {
+  tran_low_t out[16 * 16];
+  tran_low_t *outptr = out;
+  int i, j;
+  tran_low_t temp_in[16], temp_out[16];
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+  // First transform rows.
+  for (i = 0; i < 16; ++i) {
+    vp10_highbd_idct16_c(input, outptr, bd);
+    input += 16;
+    outptr += 16;
+  }
+
+  // Then transform columns.
+  for (i = 0; i < 16; ++i) {
+    for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
+    vp10_highbd_idct16_c(temp_in, temp_out, bd);
+    for (j = 0; j < 16; ++j) {
+      dest[j * stride + i] = highbd_clip_pixel_add(
+          dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
+    }
+  }
+}
+
+void vp10_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output,
+                           int bd) {
+  tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
+  tran_high_t s9, s10, s11, s12, s13, s14, s15;
+
+  tran_low_t x0 = input[15];
+  tran_low_t x1 = input[0];
+  tran_low_t x2 = input[13];
+  tran_low_t x3 = input[2];
+  tran_low_t x4 = input[11];
+  tran_low_t x5 = input[4];
+  tran_low_t x6 = input[9];
+  tran_low_t x7 = input[6];
+  tran_low_t x8 = input[7];
+  tran_low_t x9 = input[8];
+  tran_low_t x10 = input[5];
+  tran_low_t x11 = input[10];
+  tran_low_t x12 = input[3];
+  tran_low_t x13 = input[12];
+  tran_low_t x14 = input[1];
+  tran_low_t x15 = input[14];
+  (void)bd;
+
+  if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | x10 | x11 | x12 |
+        x13 | x14 | x15)) {
+    memset(output, 0, 16 * sizeof(*output));
+    return;
+  }
+
+  // stage 1
+  s0 = x0 * cospi_1_64 + x1 * cospi_31_64;
+  s1 = x0 * cospi_31_64 - x1 * cospi_1_64;
+  s2 = x2 * cospi_5_64 + x3 * cospi_27_64;
+  s3 = x2 * cospi_27_64 - x3 * cospi_5_64;
+  s4 = x4 * cospi_9_64 + x5 * cospi_23_64;
+  s5 = x4 * cospi_23_64 - x5 * cospi_9_64;
+  s6 = x6 * cospi_13_64 + x7 * cospi_19_64;
+  s7 = x6 * cospi_19_64 - x7 * cospi_13_64;
+  s8 = x8 * cospi_17_64 + x9 * cospi_15_64;
+  s9 = x8 * cospi_15_64 - x9 * cospi_17_64;
+  s10 = x10 * cospi_21_64 + x11 * cospi_11_64;
+  s11 = x10 * cospi_11_64 - x11 * cospi_21_64;
+  s12 = x12 * cospi_25_64 + x13 * cospi_7_64;
+  s13 = x12 * cospi_7_64 - x13 * cospi_25_64;
+  s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
+  s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
+
+  x0 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s8), bd);
+  x1 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s9), bd);
+  x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 + s10), bd);
+  x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 + s11), bd);
+  x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s12), bd);
+  x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s13), bd);
+  x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6 + s14), bd);
+  x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7 + s15), bd);
+  x8 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 - s8), bd);
+  x9 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 - s9), bd);
+  x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 - s10), bd);
+  x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 - s11), bd);
+  x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s12), bd);
+  x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s13), bd);
+  x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6 - s14), bd);
+  x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7 - s15), bd);
+
+  // stage 2
+  s0 = x0;
+  s1 = x1;
+  s2 = x2;
+  s3 = x3;
+  s4 = x4;
+  s5 = x5;
+  s6 = x6;
+  s7 = x7;
+  s8 = x8 * cospi_4_64 + x9 * cospi_28_64;
+  s9 = x8 * cospi_28_64 - x9 * cospi_4_64;
+  s10 = x10 * cospi_20_64 + x11 * cospi_12_64;
+  s11 = x10 * cospi_12_64 - x11 * cospi_20_64;
+  s12 = -x12 * cospi_28_64 + x13 * cospi_4_64;
+  s13 = x12 * cospi_4_64 + x13 * cospi_28_64;
+  s14 = -x14 * cospi_12_64 + x15 * cospi_20_64;
+  s15 = x14 * cospi_20_64 + x15 * cospi_12_64;
+
+  x0 = HIGHBD_WRAPLOW(s0 + s4, bd);
+  x1 = HIGHBD_WRAPLOW(s1 + s5, bd);
+  x2 = HIGHBD_WRAPLOW(s2 + s6, bd);
+  x3 = HIGHBD_WRAPLOW(s3 + s7, bd);
+  x4 = HIGHBD_WRAPLOW(s0 - s4, bd);
+  x5 = HIGHBD_WRAPLOW(s1 - s5, bd);
+  x6 = HIGHBD_WRAPLOW(s2 - s6, bd);
+  x7 = HIGHBD_WRAPLOW(s3 - s7, bd);
+  x8 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s8 + s12), bd);
+  x9 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s9 + s13), bd);
+  x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10 + s14), bd);
+  x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11 + s15), bd);
+  x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s8 - s12), bd);
+  x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s9 - s13), bd);
+  x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10 - s14), bd);
+  x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11 - s15), bd);
+
+  // stage 3
+  s0 = x0;
+  s1 = x1;
+  s2 = x2;
+  s3 = x3;
+  s4 = x4 * cospi_8_64 + x5 * cospi_24_64;
+  s5 = x4 * cospi_24_64 - x5 * cospi_8_64;
+  s6 = -x6 * cospi_24_64 + x7 * cospi_8_64;
+  s7 = x6 * cospi_8_64 + x7 * cospi_24_64;
+  s8 = x8;
+  s9 = x9;
+  s10 = x10;
+  s11 = x11;
+  s12 = x12 * cospi_8_64 + x13 * cospi_24_64;
+  s13 = x12 * cospi_24_64 - x13 * cospi_8_64;
+  s14 = -x14 * cospi_24_64 + x15 * cospi_8_64;
+  s15 = x14 * cospi_8_64 + x15 * cospi_24_64;
+
+  x0 = HIGHBD_WRAPLOW(s0 + s2, bd);
+  x1 = HIGHBD_WRAPLOW(s1 + s3, bd);
+  x2 = HIGHBD_WRAPLOW(s0 - s2, bd);
+  x3 = HIGHBD_WRAPLOW(s1 - s3, bd);
+  x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s6), bd);
+  x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s7), bd);
+  x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s6), bd);
+  x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s7), bd);
+  x8 = HIGHBD_WRAPLOW(s8 + s10, bd);
+  x9 = HIGHBD_WRAPLOW(s9 + s11, bd);
+  x10 = HIGHBD_WRAPLOW(s8 - s10, bd);
+  x11 = HIGHBD_WRAPLOW(s9 - s11, bd);
+  x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s12 + s14), bd);
+  x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s13 + s15), bd);
+  x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s12 - s14), bd);
+  x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s13 - s15), bd);
+
+  // stage 4
+  s2 = (-cospi_16_64) * (x2 + x3);
+  s3 = cospi_16_64 * (x2 - x3);
+  s6 = cospi_16_64 * (x6 + x7);
+  s7 = cospi_16_64 * (-x6 + x7);
+  s10 = cospi_16_64 * (x10 + x11);
+  s11 = cospi_16_64 * (-x10 + x11);
+  s14 = (-cospi_16_64) * (x14 + x15);
+  s15 = cospi_16_64 * (x14 - x15);
+
+  x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd);
+  x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3), bd);
+  x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6), bd);
+  x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7), bd);
+  x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10), bd);
+  x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11), bd);
+  x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s14), bd);
+  x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s15), bd);
+
+  output[0] = HIGHBD_WRAPLOW(x0, bd);
+  output[1] = HIGHBD_WRAPLOW(-x8, bd);
+  output[2] = HIGHBD_WRAPLOW(x12, bd);
+  output[3] = HIGHBD_WRAPLOW(-x4, bd);
+  output[4] = HIGHBD_WRAPLOW(x6, bd);
+  output[5] = HIGHBD_WRAPLOW(x14, bd);
+  output[6] = HIGHBD_WRAPLOW(x10, bd);
+  output[7] = HIGHBD_WRAPLOW(x2, bd);
+  output[8] = HIGHBD_WRAPLOW(x3, bd);
+  output[9] = HIGHBD_WRAPLOW(x11, bd);
+  output[10] = HIGHBD_WRAPLOW(x15, bd);
+  output[11] = HIGHBD_WRAPLOW(x7, bd);
+  output[12] = HIGHBD_WRAPLOW(x5, bd);
+  output[13] = HIGHBD_WRAPLOW(-x13, bd);
+  output[14] = HIGHBD_WRAPLOW(x9, bd);
+  output[15] = HIGHBD_WRAPLOW(-x1, bd);
+}
+
+void vp10_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8,
+                                    int stride, int bd) {
+  tran_low_t out[16 * 16] = { 0 };
+  tran_low_t *outptr = out;
+  int i, j;
+  tran_low_t temp_in[16], temp_out[16];
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+  // First transform rows. Since all non-zero dct coefficients are in
+  // upper-left 4x4 area, we only need to calculate first 4 rows here.
+  for (i = 0; i < 4; ++i) {
+    vp10_highbd_idct16_c(input, outptr, bd);
+    input += 16;
+    outptr += 16;
+  }
+
+  // Then transform columns.
+  for (i = 0; i < 16; ++i) {
+    for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
+    vp10_highbd_idct16_c(temp_in, temp_out, bd);
+    for (j = 0; j < 16; ++j) {
+      dest[j * stride + i] = highbd_clip_pixel_add(
+          dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
+    }
+  }
+}
+
+void vp10_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8,
+                                   int stride, int bd) {
+  int i, j;
+  tran_high_t a1;
+  tran_low_t out =
+      HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[0] * cospi_16_64), bd);
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+  out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd);
+  a1 = ROUND_POWER_OF_TWO(out, 6);
+  for (j = 0; j < 16; ++j) {
+    for (i = 0; i < 16; ++i) dest[i] = highbd_clip_pixel_add(dest[i], a1, bd);
+    dest += stride;
+  }
+}
+
+static void highbd_idct32_c(const tran_low_t *input, tran_low_t *output,
+                            int bd) {
+  tran_low_t step1[32], step2[32];
+  tran_high_t temp1, temp2;
+  (void)bd;
+
+  // stage 1
+  step1[0] = input[0];
+  step1[1] = input[16];
+  step1[2] = input[8];
+  step1[3] = input[24];
+  step1[4] = input[4];
+  step1[5] = input[20];
+  step1[6] = input[12];
+  step1[7] = input[28];
+  step1[8] = input[2];
+  step1[9] = input[18];
+  step1[10] = input[10];
+  step1[11] = input[26];
+  step1[12] = input[6];
+  step1[13] = input[22];
+  step1[14] = input[14];
+  step1[15] = input[30];
+
+  temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64;
+  temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64;
+  step1[16] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[31] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+  temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64;
+  temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64;
+  step1[17] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[30] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+  temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64;
+  temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64;
+  step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+  temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64;
+  temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64;
+  step1[19] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[28] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+  temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64;
+  temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64;
+  step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+  temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64;
+  temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64;
+  step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+  temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64;
+  temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64;
+  step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+  temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64;
+  temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64;
+  step1[23] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[24] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+  // stage 2
+  step2[0] = step1[0];
+  step2[1] = step1[1];
+  step2[2] = step1[2];
+  step2[3] = step1[3];
+  step2[4] = step1[4];
+  step2[5] = step1[5];
+  step2[6] = step1[6];
+  step2[7] = step1[7];
+
+  temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
+  temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
+  step2[8] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step2[15] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+  temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
+  temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
+  step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+  temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
+  temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
+  step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+  temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
+  temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
+  step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+  step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[17], bd);
+  step2[17] = HIGHBD_WRAPLOW(step1[16] - step1[17], bd);
+  step2[18] = HIGHBD_WRAPLOW(-step1[18] + step1[19], bd);
+  step2[19] = HIGHBD_WRAPLOW(step1[18] + step1[19], bd);
+  step2[20] = HIGHBD_WRAPLOW(step1[20] + step1[21], bd);
+  step2[21] = HIGHBD_WRAPLOW(step1[20] - step1[21], bd);
+  step2[22] = HIGHBD_WRAPLOW(-step1[22] + step1[23], bd);
+  step2[23] = HIGHBD_WRAPLOW(step1[22] + step1[23], bd);
+  step2[24] = HIGHBD_WRAPLOW(step1[24] + step1[25], bd);
+  step2[25] = HIGHBD_WRAPLOW(step1[24] - step1[25], bd);
+  step2[26] = HIGHBD_WRAPLOW(-step1[26] + step1[27], bd);
+  step2[27] = HIGHBD_WRAPLOW(step1[26] + step1[27], bd);
+  step2[28] = HIGHBD_WRAPLOW(step1[28] + step1[29], bd);
+  step2[29] = HIGHBD_WRAPLOW(step1[28] - step1[29], bd);
+  step2[30] = HIGHBD_WRAPLOW(-step1[30] + step1[31], bd);
+  step2[31] = HIGHBD_WRAPLOW(step1[30] + step1[31], bd);
+
+  // stage 3
+  step1[0] = step2[0];
+  step1[1] = step2[1];
+  step1[2] = step2[2];
+  step1[3] = step2[3];
+
+  temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
+  temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
+  step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
+  temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
+  step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+  step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd);
+  step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd);
+  step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd);
+  step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd);
+  step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd);
+  step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd);
+  step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd);
+  step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd);
+
+  step1[16] = step2[16];
+  step1[31] = step2[31];
+  temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64;
+  temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64;
+  step1[17] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[30] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64;
+  temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64;
+  step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  step1[19] = step2[19];
+  step1[20] = step2[20];
+  temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64;
+  temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64;
+  step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64;
+  temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64;
+  step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  step1[23] = step2[23];
+  step1[24] = step2[24];
+  step1[27] = step2[27];
+  step1[28] = step2[28];
+
+  // stage 4
+  temp1 = (step1[0] + step1[1]) * cospi_16_64;
+  temp2 = (step1[0] - step1[1]) * cospi_16_64;
+  step2[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step2[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
+  temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
+  step2[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step2[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd);
+  step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd);
+  step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd);
+  step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd);
+
+  step2[8] = step1[8];
+  step2[15] = step1[15];
+  temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
+  temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
+  step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
+  temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
+  step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  step2[11] = step1[11];
+  step2[12] = step1[12];
+
+  step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[19], bd);
+  step2[17] = HIGHBD_WRAPLOW(step1[17] + step1[18], bd);
+  step2[18] = HIGHBD_WRAPLOW(step1[17] - step1[18], bd);
+  step2[19] = HIGHBD_WRAPLOW(step1[16] - step1[19], bd);
+  step2[20] = HIGHBD_WRAPLOW(-step1[20] + step1[23], bd);
+  step2[21] = HIGHBD_WRAPLOW(-step1[21] + step1[22], bd);
+  step2[22] = HIGHBD_WRAPLOW(step1[21] + step1[22], bd);
+  step2[23] = HIGHBD_WRAPLOW(step1[20] + step1[23], bd);
+
+  step2[24] = HIGHBD_WRAPLOW(step1[24] + step1[27], bd);
+  step2[25] = HIGHBD_WRAPLOW(step1[25] + step1[26], bd);
+  step2[26] = HIGHBD_WRAPLOW(step1[25] - step1[26], bd);
+  step2[27] = HIGHBD_WRAPLOW(step1[24] - step1[27], bd);
+  step2[28] = HIGHBD_WRAPLOW(-step1[28] + step1[31], bd);
+  step2[29] = HIGHBD_WRAPLOW(-step1[29] + step1[30], bd);
+  step2[30] = HIGHBD_WRAPLOW(step1[29] + step1[30], bd);
+  step2[31] = HIGHBD_WRAPLOW(step1[28] + step1[31], bd);
+
+  // stage 5
+  step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd);
+  step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd);
+  step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd);
+  step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd);
+  step1[4] = step2[4];
+  temp1 = (step2[6] - step2[5]) * cospi_16_64;
+  temp2 = (step2[5] + step2[6]) * cospi_16_64;
+  step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  step1[7] = step2[7];
+
+  step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd);
+  step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd);
+  step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd);
+  step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd);
+  step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd);
+  step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd);
+  step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd);
+  step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd);
+
+  step1[16] = step2[16];
+  step1[17] = step2[17];
+  temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64;
+  temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64;
+  step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64;
+  temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64;
+  step1[19] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[28] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64;
+  temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64;
+  step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64;
+  temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64;
+  step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  step1[22] = step2[22];
+  step1[23] = step2[23];
+  step1[24] = step2[24];
+  step1[25] = step2[25];
+  step1[30] = step2[30];
+  step1[31] = step2[31];
+
+  // stage 6
+  step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd);
+  step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd);
+  step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd);
+  step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd);
+  step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd);
+  step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd);
+  step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd);
+  step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd);
+  step2[8] = step1[8];
+  step2[9] = step1[9];
+  temp1 = (-step1[10] + step1[13]) * cospi_16_64;
+  temp2 = (step1[10] + step1[13]) * cospi_16_64;
+  step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  temp1 = (-step1[11] + step1[12]) * cospi_16_64;
+  temp2 = (step1[11] + step1[12]) * cospi_16_64;
+  step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  step2[14] = step1[14];
+  step2[15] = step1[15];
+
+  step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[23], bd);
+  step2[17] = HIGHBD_WRAPLOW(step1[17] + step1[22], bd);
+  step2[18] = HIGHBD_WRAPLOW(step1[18] + step1[21], bd);
+  step2[19] = HIGHBD_WRAPLOW(step1[19] + step1[20], bd);
+  step2[20] = HIGHBD_WRAPLOW(step1[19] - step1[20], bd);
+  step2[21] = HIGHBD_WRAPLOW(step1[18] - step1[21], bd);
+  step2[22] = HIGHBD_WRAPLOW(step1[17] - step1[22], bd);
+  step2[23] = HIGHBD_WRAPLOW(step1[16] - step1[23], bd);
+
+  step2[24] = HIGHBD_WRAPLOW(-step1[24] + step1[31], bd);
+  step2[25] = HIGHBD_WRAPLOW(-step1[25] + step1[30], bd);
+  step2[26] = HIGHBD_WRAPLOW(-step1[26] + step1[29], bd);
+  step2[27] = HIGHBD_WRAPLOW(-step1[27] + step1[28], bd);
+  step2[28] = HIGHBD_WRAPLOW(step1[27] + step1[28], bd);
+  step2[29] = HIGHBD_WRAPLOW(step1[26] + step1[29], bd);
+  step2[30] = HIGHBD_WRAPLOW(step1[25] + step1[30], bd);
+  step2[31] = HIGHBD_WRAPLOW(step1[24] + step1[31], bd);
+
+  // stage 7
+  step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd);
+  step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[14], bd);
+  step1[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd);
+  step1[3] = HIGHBD_WRAPLOW(step2[3] + step2[12], bd);
+  step1[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd);
+  step1[5] = HIGHBD_WRAPLOW(step2[5] + step2[10], bd);
+  step1[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd);
+  step1[7] = HIGHBD_WRAPLOW(step2[7] + step2[8], bd);
+  step1[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd);
+  step1[9] = HIGHBD_WRAPLOW(step2[6] - step2[9], bd);
+  step1[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd);
+  step1[11] = HIGHBD_WRAPLOW(step2[4] - step2[11], bd);
+  step1[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd);
+  step1[13] = HIGHBD_WRAPLOW(step2[2] - step2[13], bd);
+  step1[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd);
+  step1[15] = HIGHBD_WRAPLOW(step2[0] - step2[15], bd);
+
+  step1[16] = step2[16];
+  step1[17] = step2[17];
+  step1[18] = step2[18];
+  step1[19] = step2[19];
+  temp1 = (-step2[20] + step2[27]) * cospi_16_64;
+  temp2 = (step2[20] + step2[27]) * cospi_16_64;
+  step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  temp1 = (-step2[21] + step2[26]) * cospi_16_64;
+  temp2 = (step2[21] + step2[26]) * cospi_16_64;
+  step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  temp1 = (-step2[22] + step2[25]) * cospi_16_64;
+  temp2 = (step2[22] + step2[25]) * cospi_16_64;
+  step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  temp1 = (-step2[23] + step2[24]) * cospi_16_64;
+  temp2 = (step2[23] + step2[24]) * cospi_16_64;
+  step1[23] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+  step1[24] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+  step1[28] = step2[28];
+  step1[29] = step2[29];
+  step1[30] = step2[30];
+  step1[31] = step2[31];
+
+  // final stage
+  output[0] = HIGHBD_WRAPLOW(step1[0] + step1[31], bd);
+  output[1] = HIGHBD_WRAPLOW(step1[1] + step1[30], bd);
+  output[2] = HIGHBD_WRAPLOW(step1[2] + step1[29], bd);
+  output[3] = HIGHBD_WRAPLOW(step1[3] + step1[28], bd);
+  output[4] = HIGHBD_WRAPLOW(step1[4] + step1[27], bd);
+  output[5] = HIGHBD_WRAPLOW(step1[5] + step1[26], bd);
+  output[6] = HIGHBD_WRAPLOW(step1[6] + step1[25], bd);
+  output[7] = HIGHBD_WRAPLOW(step1[7] + step1[24], bd);
+  output[8] = HIGHBD_WRAPLOW(step1[8] + step1[23], bd);
+  output[9] = HIGHBD_WRAPLOW(step1[9] + step1[22], bd);
+  output[10] = HIGHBD_WRAPLOW(step1[10] + step1[21], bd);
+  output[11] = HIGHBD_WRAPLOW(step1[11] + step1[20], bd);
+  output[12] = HIGHBD_WRAPLOW(step1[12] + step1[19], bd);
+  output[13] = HIGHBD_WRAPLOW(step1[13] + step1[18], bd);
+  output[14] = HIGHBD_WRAPLOW(step1[14] + step1[17], bd);
+  output[15] = HIGHBD_WRAPLOW(step1[15] + step1[16], bd);
+  output[16] = HIGHBD_WRAPLOW(step1[15] - step1[16], bd);
+  output[17] = HIGHBD_WRAPLOW(step1[14] - step1[17], bd);
+  output[18] = HIGHBD_WRAPLOW(step1[13] - step1[18], bd);
+  output[19] = HIGHBD_WRAPLOW(step1[12] - step1[19], bd);
+  output[20] = HIGHBD_WRAPLOW(step1[11] - step1[20], bd);
+  output[21] = HIGHBD_WRAPLOW(step1[10] - step1[21], bd);
+  output[22] = HIGHBD_WRAPLOW(step1[9] - step1[22], bd);
+  output[23] = HIGHBD_WRAPLOW(step1[8] - step1[23], bd);
+  output[24] = HIGHBD_WRAPLOW(step1[7] - step1[24], bd);
+  output[25] = HIGHBD_WRAPLOW(step1[6] - step1[25], bd);
+  output[26] = HIGHBD_WRAPLOW(step1[5] - step1[26], bd);
+  output[27] = HIGHBD_WRAPLOW(step1[4] - step1[27], bd);
+  output[28] = HIGHBD_WRAPLOW(step1[3] - step1[28], bd);
+  output[29] = HIGHBD_WRAPLOW(step1[2] - step1[29], bd);
+  output[30] = HIGHBD_WRAPLOW(step1[1] - step1[30], bd);
+  output[31] = HIGHBD_WRAPLOW(step1[0] - step1[31], bd);
+}
+
+void vp10_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
+                                      int stride, int bd) {
+  tran_low_t out[32 * 32];
+  tran_low_t *outptr = out;
+  int i, j;
+  tran_low_t temp_in[32], temp_out[32];
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+  // Rows
+  for (i = 0; i < 32; ++i) {
+    tran_low_t zero_coeff[16];
+    for (j = 0; j < 16; ++j) zero_coeff[j] = input[2 * j] | input[2 * j + 1];
+    for (j = 0; j < 8; ++j)
+      zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
+    for (j = 0; j < 4; ++j)
+      zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
+    for (j = 0; j < 2; ++j)
+      zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
+
+    if (zero_coeff[0] | zero_coeff[1])
+      highbd_idct32_c(input, outptr, bd);
+    else
+      memset(outptr, 0, sizeof(tran_low_t) * 32);
+    input += 32;
+    outptr += 32;
+  }
+
+  // Columns
+  for (i = 0; i < 32; ++i) {
+    for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i];
+    highbd_idct32_c(temp_in, temp_out, bd);
+    for (j = 0; j < 32; ++j) {
+      dest[j * stride + i] = highbd_clip_pixel_add(
+          dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
+    }
+  }
+}
+
+void vp10_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8,
+                                    int stride, int bd) {
+  tran_low_t out[32 * 32] = { 0 };
+  tran_low_t *outptr = out;
+  int i, j;
+  tran_low_t temp_in[32], temp_out[32];
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+  // Rows
+  // Only upper-left 8x8 has non-zero coeff.
+  for (i = 0; i < 8; ++i) {
+    highbd_idct32_c(input, outptr, bd);
+    input += 32;
+    outptr += 32;
+  }
+  // Columns
+  for (i = 0; i < 32; ++i) {
+    for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i];
+    highbd_idct32_c(temp_in, temp_out, bd);
+    for (j = 0; j < 32; ++j) {
+      dest[j * stride + i] = highbd_clip_pixel_add(
+          dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
+    }
+  }
+}
+
+void vp10_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8,
+                                   int stride, int bd) {
+  int i, j;
+  int a1;
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+  tran_low_t out =
+      HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[0] * cospi_16_64), bd);
+  out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd);
+  a1 = ROUND_POWER_OF_TWO(out, 6);
+
+  for (j = 0; j < 32; ++j) {
+    for (i = 0; i < 32; ++i) dest[i] = highbd_clip_pixel_add(dest[i], a1, bd);
+    dest += stride;
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
diff --git a/av1/common/vp10_inv_txfm.h b/av1/common/vp10_inv_txfm.h
new file mode 100644
index 0000000..b53db48
--- /dev/null
+++ b/av1/common/vp10_inv_txfm.h
@@ -0,0 +1,132 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_DSP_INV_TXFM_H_
+#define VPX_DSP_INV_TXFM_H_
+
+#include <assert.h>
+
+#include "./vpx_config.h"
+#include "aom_dsp/txfm_common.h"
+#include "aom_dsp/inv_txfm.h"
+#include "aom_ports/mem.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static INLINE tran_high_t check_range(tran_high_t input) {
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+  // For valid input streams, intermediate stage coefficients should always
+  // stay within the range of a signed 16 bit integer. Coefficients can go out
+  // of this range for invalid/corrupt streams. However, strictly checking
+  // this range for every intermediate coefficient can burdensome for a decoder,
+  // therefore the following assertion is only enabled when configured with
+  // --enable-coefficient-range-checking.
+  assert(INT16_MIN <= input);
+  assert(input <= INT16_MAX);
+#endif  // CONFIG_COEFFICIENT_RANGE_CHECKING
+  return input;
+}
+
+static INLINE tran_high_t dct_const_round_shift(tran_high_t input) {
+  tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
+  return rv;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE tran_high_t highbd_check_range(tran_high_t input, int bd) {
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+  // For valid highbitdepth streams, intermediate stage coefficients will
+  // stay within the ranges:
+  // - 8 bit: signed 16 bit integer
+  // - 10 bit: signed 18 bit integer
+  // - 12 bit: signed 20 bit integer
+  const int32_t int_max = (1 << (7 + bd)) - 1;
+  const int32_t int_min = -int_max - 1;
+  assert(int_min <= input);
+  assert(input <= int_max);
+  (void)int_min;
+#endif  // CONFIG_COEFFICIENT_RANGE_CHECKING
+  (void)bd;
+  return input;
+}
+
+static INLINE tran_high_t highbd_dct_const_round_shift(tran_high_t input) {
+  tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
+  return rv;
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#if CONFIG_EMULATE_HARDWARE
+// When CONFIG_EMULATE_HARDWARE is 1 the transform performs a
+// non-normative method to handle overflows. A stream that causes
+// overflows  in the inverse transform is considered invalid,
+// and a hardware implementer is free to choose any reasonable
+// method to handle overflows. However to aid in hardware
+// verification they can use a specific implementation of the
+// WRAPLOW() macro below that is identical to their intended
+// hardware implementation (and also use configure options to trigger
+// the C-implementation of the transform).
+//
+// The particular WRAPLOW implementation below performs strict
+// overflow wrapping to match common hardware implementations.
+// bd of 8 uses trans_low with 16bits, need to remove 16bits
+// bd of 10 uses trans_low with 18bits, need to remove 14bits
+// bd of 12 uses trans_low with 20bits, need to remove 12bits
+// bd of x uses trans_low with 8+x bits, need to remove 24-x bits
+
+#define WRAPLOW(x) ((((int32_t)check_range(x)) << 16) >> 16)
+#if CONFIG_VP9_HIGHBITDEPTH
+#define HIGHBD_WRAPLOW(x, bd) \
+  ((((int32_t)highbd_check_range((x), bd)) << (24 - bd)) >> (24 - bd))
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#else  // CONFIG_EMULATE_HARDWARE
+
+#define WRAPLOW(x) ((int32_t)check_range(x))
+#if CONFIG_VP9_HIGHBITDEPTH
+#define HIGHBD_WRAPLOW(x, bd) ((int32_t)highbd_check_range((x), bd))
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#endif  // CONFIG_EMULATE_HARDWARE
+
+void vp10_idct4_c(const tran_low_t *input, tran_low_t *output);
+void vp10_idct8_c(const tran_low_t *input, tran_low_t *output);
+void vp10_idct16_c(const tran_low_t *input, tran_low_t *output);
+void vp10_idct32_c(const tran_low_t *input, tran_low_t *output);
+void vp10_iadst4_c(const tran_low_t *input, tran_low_t *output);
+void vp10_iadst8_c(const tran_low_t *input, tran_low_t *output);
+void vp10_iadst16_c(const tran_low_t *input, tran_low_t *output);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd);
+void vp10_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd);
+void vp10_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd);
+
+void vp10_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd);
+void vp10_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd);
+void vp10_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd);
+
+static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans,
+                                             int bd) {
+  trans = HIGHBD_WRAPLOW(trans, bd);
+  return clip_pixel_highbd(dest + (int)trans, bd);
+}
+#endif
+
+static INLINE uint8_t clip_pixel_add(uint8_t dest, tran_high_t trans) {
+  trans = WRAPLOW(trans);
+  return clip_pixel(dest + (int)trans);
+}
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+#endif  // VPX_DSP_INV_TXFM_H_
diff --git a/av1/common/vp10_inv_txfm1d.c b/av1/common/vp10_inv_txfm1d.c
new file mode 100644
index 0000000..76fb623
--- /dev/null
+++ b/av1/common/vp10_inv_txfm1d.c
@@ -0,0 +1,1537 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include "av1/common/vp10_inv_txfm1d.h"
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+#define range_check(stage, input, buf, size, bit)                         \
+  {                                                                       \
+    int i, j;                                                             \
+    for (i = 0; i < size; ++i) {                                          \
+      int buf_bit = get_max_bit(abs(buf[i])) + 1;                         \
+      if (buf_bit > bit) {                                                \
+        printf("======== %s %d overflow ========\n", __FILE__, __LINE__); \
+        printf("stage: %d node: %d\n", stage, i);                         \
+        printf("bit: %d buf_bit: %d buf[i]: %d\n", bit, buf_bit, buf[i]); \
+        printf("input:\n");                                               \
+        for (j = 0; j < size; j++) {                                      \
+          printf("%d,", input[j]);                                        \
+        }                                                                 \
+        printf("\n");                                                     \
+        assert(0);                                                        \
+      }                                                                   \
+    }                                                                     \
+  }
+#else
+#define range_check(stage, input, buf, size, bit) \
+  {                                               \
+    (void) stage;                                 \
+    (void) input;                                 \
+    (void) buf;                                   \
+    (void) size;                                  \
+    (void) bit;                                   \
+  }
+#endif
+
+void vp10_idct4_new(const int32_t *input, int32_t *output,
+                    const int8_t *cos_bit, const int8_t *stage_range) {
+  const int32_t size = 4;
+  const int32_t *cospi;
+
+  int32_t stage = 0;
+  int32_t *bf0, *bf1;
+  int32_t step[4];
+
+  // stage 0;
+  range_check(stage, input, input, size, stage_range[stage]);
+
+  // stage 1;
+  stage++;
+  bf1 = output;
+  bf1[0] = input[0];
+  bf1[1] = input[2];
+  bf1[2] = input[1];
+  bf1[3] = input[3];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 2
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+  bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
+  bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 3
+  stage++;
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[3];
+  bf1[1] = bf0[1] + bf0[2];
+  bf1[2] = bf0[1] - bf0[2];
+  bf1[3] = bf0[0] - bf0[3];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_idct8_new(const int32_t *input, int32_t *output,
+                    const int8_t *cos_bit, const int8_t *stage_range) {
+  const int32_t size = 8;
+  const int32_t *cospi;
+
+  int32_t stage = 0;
+  int32_t *bf0, *bf1;
+  int32_t step[8];
+
+  // stage 0;
+  range_check(stage, input, input, size, stage_range[stage]);
+
+  // stage 1;
+  stage++;
+  bf1 = output;
+  bf1[0] = input[0];
+  bf1[1] = input[4];
+  bf1[2] = input[2];
+  bf1[3] = input[6];
+  bf1[4] = input[1];
+  bf1[5] = input[5];
+  bf1[6] = input[3];
+  bf1[7] = input[7];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 2
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]);
+  bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]);
+  bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]);
+  bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 3
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+  bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
+  bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
+  bf1[4] = bf0[4] + bf0[5];
+  bf1[5] = bf0[4] - bf0[5];
+  bf1[6] = -bf0[6] + bf0[7];
+  bf1[7] = bf0[6] + bf0[7];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 4
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0] + bf0[3];
+  bf1[1] = bf0[1] + bf0[2];
+  bf1[2] = bf0[1] - bf0[2];
+  bf1[3] = bf0[0] - bf0[3];
+  bf1[4] = bf0[4];
+  bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+  bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+  bf1[7] = bf0[7];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 5
+  stage++;
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[7];
+  bf1[1] = bf0[1] + bf0[6];
+  bf1[2] = bf0[2] + bf0[5];
+  bf1[3] = bf0[3] + bf0[4];
+  bf1[4] = bf0[3] - bf0[4];
+  bf1[5] = bf0[2] - bf0[5];
+  bf1[6] = bf0[1] - bf0[6];
+  bf1[7] = bf0[0] - bf0[7];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_idct16_new(const int32_t *input, int32_t *output,
+                     const int8_t *cos_bit, const int8_t *stage_range) {
+  const int32_t size = 16;
+  const int32_t *cospi;
+
+  int32_t stage = 0;
+  int32_t *bf0, *bf1;
+  int32_t step[16];
+
+  // stage 0;
+  range_check(stage, input, input, size, stage_range[stage]);
+
+  // stage 1;
+  stage++;
+  bf1 = output;
+  bf1[0] = input[0];
+  bf1[1] = input[8];
+  bf1[2] = input[4];
+  bf1[3] = input[12];
+  bf1[4] = input[2];
+  bf1[5] = input[10];
+  bf1[6] = input[6];
+  bf1[7] = input[14];
+  bf1[8] = input[1];
+  bf1[9] = input[9];
+  bf1[10] = input[5];
+  bf1[11] = input[13];
+  bf1[12] = input[3];
+  bf1[13] = input[11];
+  bf1[14] = input[7];
+  bf1[15] = input[15];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 2
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[5];
+  bf1[6] = bf0[6];
+  bf1[7] = bf0[7];
+  bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit[stage]);
+  bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit[stage]);
+  bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit[stage]);
+  bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit[stage]);
+  bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit[stage]);
+  bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit[stage]);
+  bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit[stage]);
+  bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 3
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]);
+  bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]);
+  bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]);
+  bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]);
+  bf1[8] = bf0[8] + bf0[9];
+  bf1[9] = bf0[8] - bf0[9];
+  bf1[10] = -bf0[10] + bf0[11];
+  bf1[11] = bf0[10] + bf0[11];
+  bf1[12] = bf0[12] + bf0[13];
+  bf1[13] = bf0[12] - bf0[13];
+  bf1[14] = -bf0[14] + bf0[15];
+  bf1[15] = bf0[14] + bf0[15];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 4
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+  bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
+  bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
+  bf1[4] = bf0[4] + bf0[5];
+  bf1[5] = bf0[4] - bf0[5];
+  bf1[6] = -bf0[6] + bf0[7];
+  bf1[7] = bf0[6] + bf0[7];
+  bf1[8] = bf0[8];
+  bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
+  bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+  bf1[11] = bf0[11];
+  bf1[12] = bf0[12];
+  bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit[stage]);
+  bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit[stage]);
+  bf1[15] = bf0[15];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 5
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[3];
+  bf1[1] = bf0[1] + bf0[2];
+  bf1[2] = bf0[1] - bf0[2];
+  bf1[3] = bf0[0] - bf0[3];
+  bf1[4] = bf0[4];
+  bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+  bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+  bf1[7] = bf0[7];
+  bf1[8] = bf0[8] + bf0[11];
+  bf1[9] = bf0[9] + bf0[10];
+  bf1[10] = bf0[9] - bf0[10];
+  bf1[11] = bf0[8] - bf0[11];
+  bf1[12] = -bf0[12] + bf0[15];
+  bf1[13] = -bf0[13] + bf0[14];
+  bf1[14] = bf0[13] + bf0[14];
+  bf1[15] = bf0[12] + bf0[15];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 6
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0] + bf0[7];
+  bf1[1] = bf0[1] + bf0[6];
+  bf1[2] = bf0[2] + bf0[5];
+  bf1[3] = bf0[3] + bf0[4];
+  bf1[4] = bf0[3] - bf0[4];
+  bf1[5] = bf0[2] - bf0[5];
+  bf1[6] = bf0[1] - bf0[6];
+  bf1[7] = bf0[0] - bf0[7];
+  bf1[8] = bf0[8];
+  bf1[9] = bf0[9];
+  bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+  bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+  bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+  bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+  bf1[14] = bf0[14];
+  bf1[15] = bf0[15];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 7
+  stage++;
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[15];
+  bf1[1] = bf0[1] + bf0[14];
+  bf1[2] = bf0[2] + bf0[13];
+  bf1[3] = bf0[3] + bf0[12];
+  bf1[4] = bf0[4] + bf0[11];
+  bf1[5] = bf0[5] + bf0[10];
+  bf1[6] = bf0[6] + bf0[9];
+  bf1[7] = bf0[7] + bf0[8];
+  bf1[8] = bf0[7] - bf0[8];
+  bf1[9] = bf0[6] - bf0[9];
+  bf1[10] = bf0[5] - bf0[10];
+  bf1[11] = bf0[4] - bf0[11];
+  bf1[12] = bf0[3] - bf0[12];
+  bf1[13] = bf0[2] - bf0[13];
+  bf1[14] = bf0[1] - bf0[14];
+  bf1[15] = bf0[0] - bf0[15];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_idct32_new(const int32_t *input, int32_t *output,
+                     const int8_t *cos_bit, const int8_t *stage_range) {
+  const int32_t size = 32;
+  const int32_t *cospi;
+
+  int32_t stage = 0;
+  int32_t *bf0, *bf1;
+  int32_t step[32];
+
+  // stage 0;
+  range_check(stage, input, input, size, stage_range[stage]);
+
+  // stage 1;
+  stage++;
+  bf1 = output;
+  bf1[0] = input[0];
+  bf1[1] = input[16];
+  bf1[2] = input[8];
+  bf1[3] = input[24];
+  bf1[4] = input[4];
+  bf1[5] = input[20];
+  bf1[6] = input[12];
+  bf1[7] = input[28];
+  bf1[8] = input[2];
+  bf1[9] = input[18];
+  bf1[10] = input[10];
+  bf1[11] = input[26];
+  bf1[12] = input[6];
+  bf1[13] = input[22];
+  bf1[14] = input[14];
+  bf1[15] = input[30];
+  bf1[16] = input[1];
+  bf1[17] = input[17];
+  bf1[18] = input[9];
+  bf1[19] = input[25];
+  bf1[20] = input[5];
+  bf1[21] = input[21];
+  bf1[22] = input[13];
+  bf1[23] = input[29];
+  bf1[24] = input[3];
+  bf1[25] = input[19];
+  bf1[26] = input[11];
+  bf1[27] = input[27];
+  bf1[28] = input[7];
+  bf1[29] = input[23];
+  bf1[30] = input[15];
+  bf1[31] = input[31];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 2
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[5];
+  bf1[6] = bf0[6];
+  bf1[7] = bf0[7];
+  bf1[8] = bf0[8];
+  bf1[9] = bf0[9];
+  bf1[10] = bf0[10];
+  bf1[11] = bf0[11];
+  bf1[12] = bf0[12];
+  bf1[13] = bf0[13];
+  bf1[14] = bf0[14];
+  bf1[15] = bf0[15];
+  bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit[stage]);
+  bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit[stage]);
+  bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit[stage]);
+  bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit[stage]);
+  bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit[stage]);
+  bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit[stage]);
+  bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit[stage]);
+  bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit[stage]);
+  bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit[stage]);
+  bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit[stage]);
+  bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit[stage]);
+  bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit[stage]);
+  bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit[stage]);
+  bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit[stage]);
+  bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit[stage]);
+  bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 3
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[5];
+  bf1[6] = bf0[6];
+  bf1[7] = bf0[7];
+  bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit[stage]);
+  bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit[stage]);
+  bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit[stage]);
+  bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit[stage]);
+  bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit[stage]);
+  bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit[stage]);
+  bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit[stage]);
+  bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit[stage]);
+  bf1[16] = bf0[16] + bf0[17];
+  bf1[17] = bf0[16] - bf0[17];
+  bf1[18] = -bf0[18] + bf0[19];
+  bf1[19] = bf0[18] + bf0[19];
+  bf1[20] = bf0[20] + bf0[21];
+  bf1[21] = bf0[20] - bf0[21];
+  bf1[22] = -bf0[22] + bf0[23];
+  bf1[23] = bf0[22] + bf0[23];
+  bf1[24] = bf0[24] + bf0[25];
+  bf1[25] = bf0[24] - bf0[25];
+  bf1[26] = -bf0[26] + bf0[27];
+  bf1[27] = bf0[26] + bf0[27];
+  bf1[28] = bf0[28] + bf0[29];
+  bf1[29] = bf0[28] - bf0[29];
+  bf1[30] = -bf0[30] + bf0[31];
+  bf1[31] = bf0[30] + bf0[31];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 4
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]);
+  bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]);
+  bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]);
+  bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]);
+  bf1[8] = bf0[8] + bf0[9];
+  bf1[9] = bf0[8] - bf0[9];
+  bf1[10] = -bf0[10] + bf0[11];
+  bf1[11] = bf0[10] + bf0[11];
+  bf1[12] = bf0[12] + bf0[13];
+  bf1[13] = bf0[12] - bf0[13];
+  bf1[14] = -bf0[14] + bf0[15];
+  bf1[15] = bf0[14] + bf0[15];
+  bf1[16] = bf0[16];
+  bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
+  bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
+  bf1[19] = bf0[19];
+  bf1[20] = bf0[20];
+  bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
+  bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
+  bf1[23] = bf0[23];
+  bf1[24] = bf0[24];
+  bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit[stage]);
+  bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit[stage]);
+  bf1[27] = bf0[27];
+  bf1[28] = bf0[28];
+  bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit[stage]);
+  bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit[stage]);
+  bf1[31] = bf0[31];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 5
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+  bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
+  bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
+  bf1[4] = bf0[4] + bf0[5];
+  bf1[5] = bf0[4] - bf0[5];
+  bf1[6] = -bf0[6] + bf0[7];
+  bf1[7] = bf0[6] + bf0[7];
+  bf1[8] = bf0[8];
+  bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
+  bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+  bf1[11] = bf0[11];
+  bf1[12] = bf0[12];
+  bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit[stage]);
+  bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit[stage]);
+  bf1[15] = bf0[15];
+  bf1[16] = bf0[16] + bf0[19];
+  bf1[17] = bf0[17] + bf0[18];
+  bf1[18] = bf0[17] - bf0[18];
+  bf1[19] = bf0[16] - bf0[19];
+  bf1[20] = -bf0[20] + bf0[23];
+  bf1[21] = -bf0[21] + bf0[22];
+  bf1[22] = bf0[21] + bf0[22];
+  bf1[23] = bf0[20] + bf0[23];
+  bf1[24] = bf0[24] + bf0[27];
+  bf1[25] = bf0[25] + bf0[26];
+  bf1[26] = bf0[25] - bf0[26];
+  bf1[27] = bf0[24] - bf0[27];
+  bf1[28] = -bf0[28] + bf0[31];
+  bf1[29] = -bf0[29] + bf0[30];
+  bf1[30] = bf0[29] + bf0[30];
+  bf1[31] = bf0[28] + bf0[31];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 6
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0] + bf0[3];
+  bf1[1] = bf0[1] + bf0[2];
+  bf1[2] = bf0[1] - bf0[2];
+  bf1[3] = bf0[0] - bf0[3];
+  bf1[4] = bf0[4];
+  bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+  bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+  bf1[7] = bf0[7];
+  bf1[8] = bf0[8] + bf0[11];
+  bf1[9] = bf0[9] + bf0[10];
+  bf1[10] = bf0[9] - bf0[10];
+  bf1[11] = bf0[8] - bf0[11];
+  bf1[12] = -bf0[12] + bf0[15];
+  bf1[13] = -bf0[13] + bf0[14];
+  bf1[14] = bf0[13] + bf0[14];
+  bf1[15] = bf0[12] + bf0[15];
+  bf1[16] = bf0[16];
+  bf1[17] = bf0[17];
+  bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
+  bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
+  bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
+  bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
+  bf1[22] = bf0[22];
+  bf1[23] = bf0[23];
+  bf1[24] = bf0[24];
+  bf1[25] = bf0[25];
+  bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit[stage]);
+  bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit[stage]);
+  bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit[stage]);
+  bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit[stage]);
+  bf1[30] = bf0[30];
+  bf1[31] = bf0[31];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 7
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[7];
+  bf1[1] = bf0[1] + bf0[6];
+  bf1[2] = bf0[2] + bf0[5];
+  bf1[3] = bf0[3] + bf0[4];
+  bf1[4] = bf0[3] - bf0[4];
+  bf1[5] = bf0[2] - bf0[5];
+  bf1[6] = bf0[1] - bf0[6];
+  bf1[7] = bf0[0] - bf0[7];
+  bf1[8] = bf0[8];
+  bf1[9] = bf0[9];
+  bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+  bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+  bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+  bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+  bf1[14] = bf0[14];
+  bf1[15] = bf0[15];
+  bf1[16] = bf0[16] + bf0[23];
+  bf1[17] = bf0[17] + bf0[22];
+  bf1[18] = bf0[18] + bf0[21];
+  bf1[19] = bf0[19] + bf0[20];
+  bf1[20] = bf0[19] - bf0[20];
+  bf1[21] = bf0[18] - bf0[21];
+  bf1[22] = bf0[17] - bf0[22];
+  bf1[23] = bf0[16] - bf0[23];
+  bf1[24] = -bf0[24] + bf0[31];
+  bf1[25] = -bf0[25] + bf0[30];
+  bf1[26] = -bf0[26] + bf0[29];
+  bf1[27] = -bf0[27] + bf0[28];
+  bf1[28] = bf0[27] + bf0[28];
+  bf1[29] = bf0[26] + bf0[29];
+  bf1[30] = bf0[25] + bf0[30];
+  bf1[31] = bf0[24] + bf0[31];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 8
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0] + bf0[15];
+  bf1[1] = bf0[1] + bf0[14];
+  bf1[2] = bf0[2] + bf0[13];
+  bf1[3] = bf0[3] + bf0[12];
+  bf1[4] = bf0[4] + bf0[11];
+  bf1[5] = bf0[5] + bf0[10];
+  bf1[6] = bf0[6] + bf0[9];
+  bf1[7] = bf0[7] + bf0[8];
+  bf1[8] = bf0[7] - bf0[8];
+  bf1[9] = bf0[6] - bf0[9];
+  bf1[10] = bf0[5] - bf0[10];
+  bf1[11] = bf0[4] - bf0[11];
+  bf1[12] = bf0[3] - bf0[12];
+  bf1[13] = bf0[2] - bf0[13];
+  bf1[14] = bf0[1] - bf0[14];
+  bf1[15] = bf0[0] - bf0[15];
+  bf1[16] = bf0[16];
+  bf1[17] = bf0[17];
+  bf1[18] = bf0[18];
+  bf1[19] = bf0[19];
+  bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+  bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
+  bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
+  bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
+  bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
+  bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
+  bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
+  bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+  bf1[28] = bf0[28];
+  bf1[29] = bf0[29];
+  bf1[30] = bf0[30];
+  bf1[31] = bf0[31];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 9
+  stage++;
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[31];
+  bf1[1] = bf0[1] + bf0[30];
+  bf1[2] = bf0[2] + bf0[29];
+  bf1[3] = bf0[3] + bf0[28];
+  bf1[4] = bf0[4] + bf0[27];
+  bf1[5] = bf0[5] + bf0[26];
+  bf1[6] = bf0[6] + bf0[25];
+  bf1[7] = bf0[7] + bf0[24];
+  bf1[8] = bf0[8] + bf0[23];
+  bf1[9] = bf0[9] + bf0[22];
+  bf1[10] = bf0[10] + bf0[21];
+  bf1[11] = bf0[11] + bf0[20];
+  bf1[12] = bf0[12] + bf0[19];
+  bf1[13] = bf0[13] + bf0[18];
+  bf1[14] = bf0[14] + bf0[17];
+  bf1[15] = bf0[15] + bf0[16];
+  bf1[16] = bf0[15] - bf0[16];
+  bf1[17] = bf0[14] - bf0[17];
+  bf1[18] = bf0[13] - bf0[18];
+  bf1[19] = bf0[12] - bf0[19];
+  bf1[20] = bf0[11] - bf0[20];
+  bf1[21] = bf0[10] - bf0[21];
+  bf1[22] = bf0[9] - bf0[22];
+  bf1[23] = bf0[8] - bf0[23];
+  bf1[24] = bf0[7] - bf0[24];
+  bf1[25] = bf0[6] - bf0[25];
+  bf1[26] = bf0[5] - bf0[26];
+  bf1[27] = bf0[4] - bf0[27];
+  bf1[28] = bf0[3] - bf0[28];
+  bf1[29] = bf0[2] - bf0[29];
+  bf1[30] = bf0[1] - bf0[30];
+  bf1[31] = bf0[0] - bf0[31];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_iadst4_new(const int32_t *input, int32_t *output,
+                     const int8_t *cos_bit, const int8_t *stage_range) {
+  const int32_t size = 4;
+  const int32_t *cospi;
+
+  int32_t stage = 0;
+  int32_t *bf0, *bf1;
+  int32_t step[4];
+
+  // stage 0;
+  range_check(stage, input, input, size, stage_range[stage]);
+
+  // stage 1;
+  stage++;
+  bf1 = output;
+  bf1[0] = input[0];
+  bf1[1] = -input[3];
+  bf1[2] = -input[1];
+  bf1[3] = input[2];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 2
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 3
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[2];
+  bf1[1] = bf0[1] + bf0[3];
+  bf1[2] = bf0[0] - bf0[2];
+  bf1[3] = bf0[1] - bf0[3];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 4
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = half_btf(cospi[8], bf0[0], cospi[56], bf0[1], cos_bit[stage]);
+  bf1[1] = half_btf(cospi[56], bf0[0], -cospi[8], bf0[1], cos_bit[stage]);
+  bf1[2] = half_btf(cospi[40], bf0[2], cospi[24], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(cospi[24], bf0[2], -cospi[40], bf0[3], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 5
+  stage++;
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[1];
+  bf1[1] = bf0[2];
+  bf1[2] = bf0[3];
+  bf1[3] = bf0[0];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_iadst8_new(const int32_t *input, int32_t *output,
+                     const int8_t *cos_bit, const int8_t *stage_range) {
+  const int32_t size = 8;
+  const int32_t *cospi;
+
+  int32_t stage = 0;
+  int32_t *bf0, *bf1;
+  int32_t step[8];
+
+  // stage 0;
+  range_check(stage, input, input, size, stage_range[stage]);
+
+  // stage 1;
+  stage++;
+  bf1 = output;
+  bf1[0] = input[0];
+  bf1[1] = -input[7];
+  bf1[2] = -input[3];
+  bf1[3] = input[4];
+  bf1[4] = -input[1];
+  bf1[5] = input[6];
+  bf1[6] = input[2];
+  bf1[7] = -input[5];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 2
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]);
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[5];
+  bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
+  bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 3
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[2];
+  bf1[1] = bf0[1] + bf0[3];
+  bf1[2] = bf0[0] - bf0[2];
+  bf1[3] = bf0[1] - bf0[3];
+  bf1[4] = bf0[4] + bf0[6];
+  bf1[5] = bf0[5] + bf0[7];
+  bf1[6] = bf0[4] - bf0[6];
+  bf1[7] = bf0[5] - bf0[7];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 4
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
+  bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit[stage]);
+  bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
+  bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 5
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[4];
+  bf1[1] = bf0[1] + bf0[5];
+  bf1[2] = bf0[2] + bf0[6];
+  bf1[3] = bf0[3] + bf0[7];
+  bf1[4] = bf0[0] - bf0[4];
+  bf1[5] = bf0[1] - bf0[5];
+  bf1[6] = bf0[2] - bf0[6];
+  bf1[7] = bf0[3] - bf0[7];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 6
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit[stage]);
+  bf1[1] = half_btf(cospi[60], bf0[0], -cospi[4], bf0[1], cos_bit[stage]);
+  bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(cospi[44], bf0[2], -cospi[20], bf0[3], cos_bit[stage]);
+  bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit[stage]);
+  bf1[5] = half_btf(cospi[28], bf0[4], -cospi[36], bf0[5], cos_bit[stage]);
+  bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit[stage]);
+  bf1[7] = half_btf(cospi[12], bf0[6], -cospi[52], bf0[7], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 7
+  stage++;
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[1];
+  bf1[1] = bf0[6];
+  bf1[2] = bf0[3];
+  bf1[3] = bf0[4];
+  bf1[4] = bf0[5];
+  bf1[5] = bf0[2];
+  bf1[6] = bf0[7];
+  bf1[7] = bf0[0];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_iadst16_new(const int32_t *input, int32_t *output,
+                      const int8_t *cos_bit, const int8_t *stage_range) {
+  const int32_t size = 16;
+  const int32_t *cospi;
+
+  int32_t stage = 0;
+  int32_t *bf0, *bf1;
+  int32_t step[16];
+
+  // stage 0;
+  range_check(stage, input, input, size, stage_range[stage]);
+
+  // stage 1;
+  stage++;
+  bf1 = output;
+  bf1[0] = input[0];
+  bf1[1] = -input[15];
+  bf1[2] = -input[7];
+  bf1[3] = input[8];
+  bf1[4] = -input[3];
+  bf1[5] = input[12];
+  bf1[6] = input[4];
+  bf1[7] = -input[11];
+  bf1[8] = -input[1];
+  bf1[9] = input[14];
+  bf1[10] = input[6];
+  bf1[11] = -input[9];
+  bf1[12] = input[2];
+  bf1[13] = -input[13];
+  bf1[14] = -input[5];
+  bf1[15] = input[10];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 2
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]);
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[5];
+  bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
+  bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit[stage]);
+  bf1[8] = bf0[8];
+  bf1[9] = bf0[9];
+  bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
+  bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit[stage]);
+  bf1[12] = bf0[12];
+  bf1[13] = bf0[13];
+  bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
+  bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 3
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[2];
+  bf1[1] = bf0[1] + bf0[3];
+  bf1[2] = bf0[0] - bf0[2];
+  bf1[3] = bf0[1] - bf0[3];
+  bf1[4] = bf0[4] + bf0[6];
+  bf1[5] = bf0[5] + bf0[7];
+  bf1[6] = bf0[4] - bf0[6];
+  bf1[7] = bf0[5] - bf0[7];
+  bf1[8] = bf0[8] + bf0[10];
+  bf1[9] = bf0[9] + bf0[11];
+  bf1[10] = bf0[8] - bf0[10];
+  bf1[11] = bf0[9] - bf0[11];
+  bf1[12] = bf0[12] + bf0[14];
+  bf1[13] = bf0[13] + bf0[15];
+  bf1[14] = bf0[12] - bf0[14];
+  bf1[15] = bf0[13] - bf0[15];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 4
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
+  bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit[stage]);
+  bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
+  bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit[stage]);
+  bf1[8] = bf0[8];
+  bf1[9] = bf0[9];
+  bf1[10] = bf0[10];
+  bf1[11] = bf0[11];
+  bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
+  bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit[stage]);
+  bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
+  bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 5
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[4];
+  bf1[1] = bf0[1] + bf0[5];
+  bf1[2] = bf0[2] + bf0[6];
+  bf1[3] = bf0[3] + bf0[7];
+  bf1[4] = bf0[0] - bf0[4];
+  bf1[5] = bf0[1] - bf0[5];
+  bf1[6] = bf0[2] - bf0[6];
+  bf1[7] = bf0[3] - bf0[7];
+  bf1[8] = bf0[8] + bf0[12];
+  bf1[9] = bf0[9] + bf0[13];
+  bf1[10] = bf0[10] + bf0[14];
+  bf1[11] = bf0[11] + bf0[15];
+  bf1[12] = bf0[8] - bf0[12];
+  bf1[13] = bf0[9] - bf0[13];
+  bf1[14] = bf0[10] - bf0[14];
+  bf1[15] = bf0[11] - bf0[15];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 6
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[5];
+  bf1[6] = bf0[6];
+  bf1[7] = bf0[7];
+  bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
+  bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit[stage]);
+  bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
+  bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit[stage]);
+  bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
+  bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit[stage]);
+  bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
+  bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 7
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[8];
+  bf1[1] = bf0[1] + bf0[9];
+  bf1[2] = bf0[2] + bf0[10];
+  bf1[3] = bf0[3] + bf0[11];
+  bf1[4] = bf0[4] + bf0[12];
+  bf1[5] = bf0[5] + bf0[13];
+  bf1[6] = bf0[6] + bf0[14];
+  bf1[7] = bf0[7] + bf0[15];
+  bf1[8] = bf0[0] - bf0[8];
+  bf1[9] = bf0[1] - bf0[9];
+  bf1[10] = bf0[2] - bf0[10];
+  bf1[11] = bf0[3] - bf0[11];
+  bf1[12] = bf0[4] - bf0[12];
+  bf1[13] = bf0[5] - bf0[13];
+  bf1[14] = bf0[6] - bf0[14];
+  bf1[15] = bf0[7] - bf0[15];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 8
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit[stage]);
+  bf1[1] = half_btf(cospi[62], bf0[0], -cospi[2], bf0[1], cos_bit[stage]);
+  bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(cospi[54], bf0[2], -cospi[10], bf0[3], cos_bit[stage]);
+  bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit[stage]);
+  bf1[5] = half_btf(cospi[46], bf0[4], -cospi[18], bf0[5], cos_bit[stage]);
+  bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit[stage]);
+  bf1[7] = half_btf(cospi[38], bf0[6], -cospi[26], bf0[7], cos_bit[stage]);
+  bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit[stage]);
+  bf1[9] = half_btf(cospi[30], bf0[8], -cospi[34], bf0[9], cos_bit[stage]);
+  bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit[stage]);
+  bf1[11] = half_btf(cospi[22], bf0[10], -cospi[42], bf0[11], cos_bit[stage]);
+  bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit[stage]);
+  bf1[13] = half_btf(cospi[14], bf0[12], -cospi[50], bf0[13], cos_bit[stage]);
+  bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit[stage]);
+  bf1[15] = half_btf(cospi[6], bf0[14], -cospi[58], bf0[15], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 9
+  stage++;
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[1];
+  bf1[1] = bf0[14];
+  bf1[2] = bf0[3];
+  bf1[3] = bf0[12];
+  bf1[4] = bf0[5];
+  bf1[5] = bf0[10];
+  bf1[6] = bf0[7];
+  bf1[7] = bf0[8];
+  bf1[8] = bf0[9];
+  bf1[9] = bf0[6];
+  bf1[10] = bf0[11];
+  bf1[11] = bf0[4];
+  bf1[12] = bf0[13];
+  bf1[13] = bf0[2];
+  bf1[14] = bf0[15];
+  bf1[15] = bf0[0];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_iadst32_new(const int32_t *input, int32_t *output,
+                      const int8_t *cos_bit, const int8_t *stage_range) {
+  const int32_t size = 32;
+  const int32_t *cospi;
+
+  int32_t stage = 0;
+  int32_t *bf0, *bf1;
+  int32_t step[32];
+
+  // stage 0;
+  range_check(stage, input, input, size, stage_range[stage]);
+
+  // stage 1;
+  stage++;
+  bf1 = output;
+  bf1[0] = input[0];
+  bf1[1] = -input[31];
+  bf1[2] = -input[15];
+  bf1[3] = input[16];
+  bf1[4] = -input[7];
+  bf1[5] = input[24];
+  bf1[6] = input[8];
+  bf1[7] = -input[23];
+  bf1[8] = -input[3];
+  bf1[9] = input[28];
+  bf1[10] = input[12];
+  bf1[11] = -input[19];
+  bf1[12] = input[4];
+  bf1[13] = -input[27];
+  bf1[14] = -input[11];
+  bf1[15] = input[20];
+  bf1[16] = -input[1];
+  bf1[17] = input[30];
+  bf1[18] = input[14];
+  bf1[19] = -input[17];
+  bf1[20] = input[6];
+  bf1[21] = -input[25];
+  bf1[22] = -input[9];
+  bf1[23] = input[22];
+  bf1[24] = input[2];
+  bf1[25] = -input[29];
+  bf1[26] = -input[13];
+  bf1[27] = input[18];
+  bf1[28] = -input[5];
+  bf1[29] = input[26];
+  bf1[30] = input[10];
+  bf1[31] = -input[21];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 2
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]);
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[5];
+  bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
+  bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit[stage]);
+  bf1[8] = bf0[8];
+  bf1[9] = bf0[9];
+  bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
+  bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit[stage]);
+  bf1[12] = bf0[12];
+  bf1[13] = bf0[13];
+  bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
+  bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit[stage]);
+  bf1[16] = bf0[16];
+  bf1[17] = bf0[17];
+  bf1[18] = half_btf(cospi[32], bf0[18], cospi[32], bf0[19], cos_bit[stage]);
+  bf1[19] = half_btf(cospi[32], bf0[18], -cospi[32], bf0[19], cos_bit[stage]);
+  bf1[20] = bf0[20];
+  bf1[21] = bf0[21];
+  bf1[22] = half_btf(cospi[32], bf0[22], cospi[32], bf0[23], cos_bit[stage]);
+  bf1[23] = half_btf(cospi[32], bf0[22], -cospi[32], bf0[23], cos_bit[stage]);
+  bf1[24] = bf0[24];
+  bf1[25] = bf0[25];
+  bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[27], cos_bit[stage]);
+  bf1[27] = half_btf(cospi[32], bf0[26], -cospi[32], bf0[27], cos_bit[stage]);
+  bf1[28] = bf0[28];
+  bf1[29] = bf0[29];
+  bf1[30] = half_btf(cospi[32], bf0[30], cospi[32], bf0[31], cos_bit[stage]);
+  bf1[31] = half_btf(cospi[32], bf0[30], -cospi[32], bf0[31], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 3
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[2];
+  bf1[1] = bf0[1] + bf0[3];
+  bf1[2] = bf0[0] - bf0[2];
+  bf1[3] = bf0[1] - bf0[3];
+  bf1[4] = bf0[4] + bf0[6];
+  bf1[5] = bf0[5] + bf0[7];
+  bf1[6] = bf0[4] - bf0[6];
+  bf1[7] = bf0[5] - bf0[7];
+  bf1[8] = bf0[8] + bf0[10];
+  bf1[9] = bf0[9] + bf0[11];
+  bf1[10] = bf0[8] - bf0[10];
+  bf1[11] = bf0[9] - bf0[11];
+  bf1[12] = bf0[12] + bf0[14];
+  bf1[13] = bf0[13] + bf0[15];
+  bf1[14] = bf0[12] - bf0[14];
+  bf1[15] = bf0[13] - bf0[15];
+  bf1[16] = bf0[16] + bf0[18];
+  bf1[17] = bf0[17] + bf0[19];
+  bf1[18] = bf0[16] - bf0[18];
+  bf1[19] = bf0[17] - bf0[19];
+  bf1[20] = bf0[20] + bf0[22];
+  bf1[21] = bf0[21] + bf0[23];
+  bf1[22] = bf0[20] - bf0[22];
+  bf1[23] = bf0[21] - bf0[23];
+  bf1[24] = bf0[24] + bf0[26];
+  bf1[25] = bf0[25] + bf0[27];
+  bf1[26] = bf0[24] - bf0[26];
+  bf1[27] = bf0[25] - bf0[27];
+  bf1[28] = bf0[28] + bf0[30];
+  bf1[29] = bf0[29] + bf0[31];
+  bf1[30] = bf0[28] - bf0[30];
+  bf1[31] = bf0[29] - bf0[31];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 4
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
+  bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit[stage]);
+  bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
+  bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit[stage]);
+  bf1[8] = bf0[8];
+  bf1[9] = bf0[9];
+  bf1[10] = bf0[10];
+  bf1[11] = bf0[11];
+  bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
+  bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit[stage]);
+  bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
+  bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit[stage]);
+  bf1[16] = bf0[16];
+  bf1[17] = bf0[17];
+  bf1[18] = bf0[18];
+  bf1[19] = bf0[19];
+  bf1[20] = half_btf(cospi[16], bf0[20], cospi[48], bf0[21], cos_bit[stage]);
+  bf1[21] = half_btf(cospi[48], bf0[20], -cospi[16], bf0[21], cos_bit[stage]);
+  bf1[22] = half_btf(-cospi[48], bf0[22], cospi[16], bf0[23], cos_bit[stage]);
+  bf1[23] = half_btf(cospi[16], bf0[22], cospi[48], bf0[23], cos_bit[stage]);
+  bf1[24] = bf0[24];
+  bf1[25] = bf0[25];
+  bf1[26] = bf0[26];
+  bf1[27] = bf0[27];
+  bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[29], cos_bit[stage]);
+  bf1[29] = half_btf(cospi[48], bf0[28], -cospi[16], bf0[29], cos_bit[stage]);
+  bf1[30] = half_btf(-cospi[48], bf0[30], cospi[16], bf0[31], cos_bit[stage]);
+  bf1[31] = half_btf(cospi[16], bf0[30], cospi[48], bf0[31], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 5
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[4];
+  bf1[1] = bf0[1] + bf0[5];
+  bf1[2] = bf0[2] + bf0[6];
+  bf1[3] = bf0[3] + bf0[7];
+  bf1[4] = bf0[0] - bf0[4];
+  bf1[5] = bf0[1] - bf0[5];
+  bf1[6] = bf0[2] - bf0[6];
+  bf1[7] = bf0[3] - bf0[7];
+  bf1[8] = bf0[8] + bf0[12];
+  bf1[9] = bf0[9] + bf0[13];
+  bf1[10] = bf0[10] + bf0[14];
+  bf1[11] = bf0[11] + bf0[15];
+  bf1[12] = bf0[8] - bf0[12];
+  bf1[13] = bf0[9] - bf0[13];
+  bf1[14] = bf0[10] - bf0[14];
+  bf1[15] = bf0[11] - bf0[15];
+  bf1[16] = bf0[16] + bf0[20];
+  bf1[17] = bf0[17] + bf0[21];
+  bf1[18] = bf0[18] + bf0[22];
+  bf1[19] = bf0[19] + bf0[23];
+  bf1[20] = bf0[16] - bf0[20];
+  bf1[21] = bf0[17] - bf0[21];
+  bf1[22] = bf0[18] - bf0[22];
+  bf1[23] = bf0[19] - bf0[23];
+  bf1[24] = bf0[24] + bf0[28];
+  bf1[25] = bf0[25] + bf0[29];
+  bf1[26] = bf0[26] + bf0[30];
+  bf1[27] = bf0[27] + bf0[31];
+  bf1[28] = bf0[24] - bf0[28];
+  bf1[29] = bf0[25] - bf0[29];
+  bf1[30] = bf0[26] - bf0[30];
+  bf1[31] = bf0[27] - bf0[31];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 6
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[5];
+  bf1[6] = bf0[6];
+  bf1[7] = bf0[7];
+  bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
+  bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit[stage]);
+  bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
+  bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit[stage]);
+  bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
+  bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit[stage]);
+  bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
+  bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit[stage]);
+  bf1[16] = bf0[16];
+  bf1[17] = bf0[17];
+  bf1[18] = bf0[18];
+  bf1[19] = bf0[19];
+  bf1[20] = bf0[20];
+  bf1[21] = bf0[21];
+  bf1[22] = bf0[22];
+  bf1[23] = bf0[23];
+  bf1[24] = half_btf(cospi[8], bf0[24], cospi[56], bf0[25], cos_bit[stage]);
+  bf1[25] = half_btf(cospi[56], bf0[24], -cospi[8], bf0[25], cos_bit[stage]);
+  bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[27], cos_bit[stage]);
+  bf1[27] = half_btf(cospi[24], bf0[26], -cospi[40], bf0[27], cos_bit[stage]);
+  bf1[28] = half_btf(-cospi[56], bf0[28], cospi[8], bf0[29], cos_bit[stage]);
+  bf1[29] = half_btf(cospi[8], bf0[28], cospi[56], bf0[29], cos_bit[stage]);
+  bf1[30] = half_btf(-cospi[24], bf0[30], cospi[40], bf0[31], cos_bit[stage]);
+  bf1[31] = half_btf(cospi[40], bf0[30], cospi[24], bf0[31], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 7
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[8];
+  bf1[1] = bf0[1] + bf0[9];
+  bf1[2] = bf0[2] + bf0[10];
+  bf1[3] = bf0[3] + bf0[11];
+  bf1[4] = bf0[4] + bf0[12];
+  bf1[5] = bf0[5] + bf0[13];
+  bf1[6] = bf0[6] + bf0[14];
+  bf1[7] = bf0[7] + bf0[15];
+  bf1[8] = bf0[0] - bf0[8];
+  bf1[9] = bf0[1] - bf0[9];
+  bf1[10] = bf0[2] - bf0[10];
+  bf1[11] = bf0[3] - bf0[11];
+  bf1[12] = bf0[4] - bf0[12];
+  bf1[13] = bf0[5] - bf0[13];
+  bf1[14] = bf0[6] - bf0[14];
+  bf1[15] = bf0[7] - bf0[15];
+  bf1[16] = bf0[16] + bf0[24];
+  bf1[17] = bf0[17] + bf0[25];
+  bf1[18] = bf0[18] + bf0[26];
+  bf1[19] = bf0[19] + bf0[27];
+  bf1[20] = bf0[20] + bf0[28];
+  bf1[21] = bf0[21] + bf0[29];
+  bf1[22] = bf0[22] + bf0[30];
+  bf1[23] = bf0[23] + bf0[31];
+  bf1[24] = bf0[16] - bf0[24];
+  bf1[25] = bf0[17] - bf0[25];
+  bf1[26] = bf0[18] - bf0[26];
+  bf1[27] = bf0[19] - bf0[27];
+  bf1[28] = bf0[20] - bf0[28];
+  bf1[29] = bf0[21] - bf0[29];
+  bf1[30] = bf0[22] - bf0[30];
+  bf1[31] = bf0[23] - bf0[31];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 8
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[5];
+  bf1[6] = bf0[6];
+  bf1[7] = bf0[7];
+  bf1[8] = bf0[8];
+  bf1[9] = bf0[9];
+  bf1[10] = bf0[10];
+  bf1[11] = bf0[11];
+  bf1[12] = bf0[12];
+  bf1[13] = bf0[13];
+  bf1[14] = bf0[14];
+  bf1[15] = bf0[15];
+  bf1[16] = half_btf(cospi[4], bf0[16], cospi[60], bf0[17], cos_bit[stage]);
+  bf1[17] = half_btf(cospi[60], bf0[16], -cospi[4], bf0[17], cos_bit[stage]);
+  bf1[18] = half_btf(cospi[20], bf0[18], cospi[44], bf0[19], cos_bit[stage]);
+  bf1[19] = half_btf(cospi[44], bf0[18], -cospi[20], bf0[19], cos_bit[stage]);
+  bf1[20] = half_btf(cospi[36], bf0[20], cospi[28], bf0[21], cos_bit[stage]);
+  bf1[21] = half_btf(cospi[28], bf0[20], -cospi[36], bf0[21], cos_bit[stage]);
+  bf1[22] = half_btf(cospi[52], bf0[22], cospi[12], bf0[23], cos_bit[stage]);
+  bf1[23] = half_btf(cospi[12], bf0[22], -cospi[52], bf0[23], cos_bit[stage]);
+  bf1[24] = half_btf(-cospi[60], bf0[24], cospi[4], bf0[25], cos_bit[stage]);
+  bf1[25] = half_btf(cospi[4], bf0[24], cospi[60], bf0[25], cos_bit[stage]);
+  bf1[26] = half_btf(-cospi[44], bf0[26], cospi[20], bf0[27], cos_bit[stage]);
+  bf1[27] = half_btf(cospi[20], bf0[26], cospi[44], bf0[27], cos_bit[stage]);
+  bf1[28] = half_btf(-cospi[28], bf0[28], cospi[36], bf0[29], cos_bit[stage]);
+  bf1[29] = half_btf(cospi[36], bf0[28], cospi[28], bf0[29], cos_bit[stage]);
+  bf1[30] = half_btf(-cospi[12], bf0[30], cospi[52], bf0[31], cos_bit[stage]);
+  bf1[31] = half_btf(cospi[52], bf0[30], cospi[12], bf0[31], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 9
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[16];
+  bf1[1] = bf0[1] + bf0[17];
+  bf1[2] = bf0[2] + bf0[18];
+  bf1[3] = bf0[3] + bf0[19];
+  bf1[4] = bf0[4] + bf0[20];
+  bf1[5] = bf0[5] + bf0[21];
+  bf1[6] = bf0[6] + bf0[22];
+  bf1[7] = bf0[7] + bf0[23];
+  bf1[8] = bf0[8] + bf0[24];
+  bf1[9] = bf0[9] + bf0[25];
+  bf1[10] = bf0[10] + bf0[26];
+  bf1[11] = bf0[11] + bf0[27];
+  bf1[12] = bf0[12] + bf0[28];
+  bf1[13] = bf0[13] + bf0[29];
+  bf1[14] = bf0[14] + bf0[30];
+  bf1[15] = bf0[15] + bf0[31];
+  bf1[16] = bf0[0] - bf0[16];
+  bf1[17] = bf0[1] - bf0[17];
+  bf1[18] = bf0[2] - bf0[18];
+  bf1[19] = bf0[3] - bf0[19];
+  bf1[20] = bf0[4] - bf0[20];
+  bf1[21] = bf0[5] - bf0[21];
+  bf1[22] = bf0[6] - bf0[22];
+  bf1[23] = bf0[7] - bf0[23];
+  bf1[24] = bf0[8] - bf0[24];
+  bf1[25] = bf0[9] - bf0[25];
+  bf1[26] = bf0[10] - bf0[26];
+  bf1[27] = bf0[11] - bf0[27];
+  bf1[28] = bf0[12] - bf0[28];
+  bf1[29] = bf0[13] - bf0[29];
+  bf1[30] = bf0[14] - bf0[30];
+  bf1[31] = bf0[15] - bf0[31];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 10
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = half_btf(cospi[1], bf0[0], cospi[63], bf0[1], cos_bit[stage]);
+  bf1[1] = half_btf(cospi[63], bf0[0], -cospi[1], bf0[1], cos_bit[stage]);
+  bf1[2] = half_btf(cospi[5], bf0[2], cospi[59], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(cospi[59], bf0[2], -cospi[5], bf0[3], cos_bit[stage]);
+  bf1[4] = half_btf(cospi[9], bf0[4], cospi[55], bf0[5], cos_bit[stage]);
+  bf1[5] = half_btf(cospi[55], bf0[4], -cospi[9], bf0[5], cos_bit[stage]);
+  bf1[6] = half_btf(cospi[13], bf0[6], cospi[51], bf0[7], cos_bit[stage]);
+  bf1[7] = half_btf(cospi[51], bf0[6], -cospi[13], bf0[7], cos_bit[stage]);
+  bf1[8] = half_btf(cospi[17], bf0[8], cospi[47], bf0[9], cos_bit[stage]);
+  bf1[9] = half_btf(cospi[47], bf0[8], -cospi[17], bf0[9], cos_bit[stage]);
+  bf1[10] = half_btf(cospi[21], bf0[10], cospi[43], bf0[11], cos_bit[stage]);
+  bf1[11] = half_btf(cospi[43], bf0[10], -cospi[21], bf0[11], cos_bit[stage]);
+  bf1[12] = half_btf(cospi[25], bf0[12], cospi[39], bf0[13], cos_bit[stage]);
+  bf1[13] = half_btf(cospi[39], bf0[12], -cospi[25], bf0[13], cos_bit[stage]);
+  bf1[14] = half_btf(cospi[29], bf0[14], cospi[35], bf0[15], cos_bit[stage]);
+  bf1[15] = half_btf(cospi[35], bf0[14], -cospi[29], bf0[15], cos_bit[stage]);
+  bf1[16] = half_btf(cospi[33], bf0[16], cospi[31], bf0[17], cos_bit[stage]);
+  bf1[17] = half_btf(cospi[31], bf0[16], -cospi[33], bf0[17], cos_bit[stage]);
+  bf1[18] = half_btf(cospi[37], bf0[18], cospi[27], bf0[19], cos_bit[stage]);
+  bf1[19] = half_btf(cospi[27], bf0[18], -cospi[37], bf0[19], cos_bit[stage]);
+  bf1[20] = half_btf(cospi[41], bf0[20], cospi[23], bf0[21], cos_bit[stage]);
+  bf1[21] = half_btf(cospi[23], bf0[20], -cospi[41], bf0[21], cos_bit[stage]);
+  bf1[22] = half_btf(cospi[45], bf0[22], cospi[19], bf0[23], cos_bit[stage]);
+  bf1[23] = half_btf(cospi[19], bf0[22], -cospi[45], bf0[23], cos_bit[stage]);
+  bf1[24] = half_btf(cospi[49], bf0[24], cospi[15], bf0[25], cos_bit[stage]);
+  bf1[25] = half_btf(cospi[15], bf0[24], -cospi[49], bf0[25], cos_bit[stage]);
+  bf1[26] = half_btf(cospi[53], bf0[26], cospi[11], bf0[27], cos_bit[stage]);
+  bf1[27] = half_btf(cospi[11], bf0[26], -cospi[53], bf0[27], cos_bit[stage]);
+  bf1[28] = half_btf(cospi[57], bf0[28], cospi[7], bf0[29], cos_bit[stage]);
+  bf1[29] = half_btf(cospi[7], bf0[28], -cospi[57], bf0[29], cos_bit[stage]);
+  bf1[30] = half_btf(cospi[61], bf0[30], cospi[3], bf0[31], cos_bit[stage]);
+  bf1[31] = half_btf(cospi[3], bf0[30], -cospi[61], bf0[31], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 11
+  stage++;
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[1];
+  bf1[1] = bf0[30];
+  bf1[2] = bf0[3];
+  bf1[3] = bf0[28];
+  bf1[4] = bf0[5];
+  bf1[5] = bf0[26];
+  bf1[6] = bf0[7];
+  bf1[7] = bf0[24];
+  bf1[8] = bf0[9];
+  bf1[9] = bf0[22];
+  bf1[10] = bf0[11];
+  bf1[11] = bf0[20];
+  bf1[12] = bf0[13];
+  bf1[13] = bf0[18];
+  bf1[14] = bf0[15];
+  bf1[15] = bf0[16];
+  bf1[16] = bf0[17];
+  bf1[17] = bf0[14];
+  bf1[18] = bf0[19];
+  bf1[19] = bf0[12];
+  bf1[20] = bf0[21];
+  bf1[21] = bf0[10];
+  bf1[22] = bf0[23];
+  bf1[23] = bf0[8];
+  bf1[24] = bf0[25];
+  bf1[25] = bf0[6];
+  bf1[26] = bf0[27];
+  bf1[27] = bf0[4];
+  bf1[28] = bf0[29];
+  bf1[29] = bf0[2];
+  bf1[30] = bf0[31];
+  bf1[31] = bf0[0];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+}
diff --git a/av1/common/vp10_inv_txfm1d.h b/av1/common/vp10_inv_txfm1d.h
new file mode 100644
index 0000000..21b80bf
--- /dev/null
+++ b/av1/common/vp10_inv_txfm1d.h
@@ -0,0 +1,44 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_INV_TXFM1D_H_
+#define VP10_INV_TXFM1D_H_
+
+#include "av1/common/vp10_txfm.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_idct4_new(const int32_t *input, int32_t *output,
+                    const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_idct8_new(const int32_t *input, int32_t *output,
+                    const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_idct16_new(const int32_t *input, int32_t *output,
+                     const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_idct32_new(const int32_t *input, int32_t *output,
+                     const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_idct64_new(const int32_t *input, int32_t *output,
+                     const int8_t *cos_bit, const int8_t *stage_range);
+
+void vp10_iadst4_new(const int32_t *input, int32_t *output,
+                     const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_iadst8_new(const int32_t *input, int32_t *output,
+                     const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_iadst16_new(const int32_t *input, int32_t *output,
+                      const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_iadst32_new(const int32_t *input, int32_t *output,
+                      const int8_t *cos_bit, const int8_t *stage_range);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // VP10_INV_TXFM1D_H_
diff --git a/av1/common/vp10_inv_txfm2d.c b/av1/common/vp10_inv_txfm2d.c
new file mode 100644
index 0000000..60606c9
--- /dev/null
+++ b/av1/common/vp10_inv_txfm2d.c
@@ -0,0 +1,191 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp10_rtcd.h"
+#include "av1/common/enums.h"
+#include "av1/common/vp10_txfm.h"
+#include "av1/common/vp10_inv_txfm1d.h"
+#include "av1/common/vp10_inv_txfm2d_cfg.h"
+
+static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) {
+  switch (txfm_type) {
+    case TXFM_TYPE_DCT4: return vp10_idct4_new;
+    case TXFM_TYPE_DCT8: return vp10_idct8_new;
+    case TXFM_TYPE_DCT16: return vp10_idct16_new;
+    case TXFM_TYPE_DCT32: return vp10_idct32_new;
+    case TXFM_TYPE_ADST4: return vp10_iadst4_new;
+    case TXFM_TYPE_ADST8: return vp10_iadst8_new;
+    case TXFM_TYPE_ADST16: return vp10_iadst16_new;
+    case TXFM_TYPE_ADST32: return vp10_iadst32_new;
+    default: assert(0); return NULL;
+  }
+}
+
+#if CONFIG_EXT_TX
+static const TXFM_2D_CFG *inv_txfm_cfg_ls[FLIPADST_ADST + 1][TX_SIZES] = {
+  { &inv_txfm_2d_cfg_dct_dct_4, &inv_txfm_2d_cfg_dct_dct_8,
+    &inv_txfm_2d_cfg_dct_dct_16, &inv_txfm_2d_cfg_dct_dct_32 },
+  { &inv_txfm_2d_cfg_adst_dct_4, &inv_txfm_2d_cfg_adst_dct_8,
+    &inv_txfm_2d_cfg_adst_dct_16, &inv_txfm_2d_cfg_adst_dct_32 },
+  { &inv_txfm_2d_cfg_dct_adst_4, &inv_txfm_2d_cfg_dct_adst_8,
+    &inv_txfm_2d_cfg_dct_adst_16, &inv_txfm_2d_cfg_dct_adst_32 },
+  { &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
+    &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
+  { &inv_txfm_2d_cfg_adst_dct_4, &inv_txfm_2d_cfg_adst_dct_8,
+    &inv_txfm_2d_cfg_adst_dct_16, &inv_txfm_2d_cfg_adst_dct_32 },
+  { &inv_txfm_2d_cfg_dct_adst_4, &inv_txfm_2d_cfg_dct_adst_8,
+    &inv_txfm_2d_cfg_dct_adst_16, &inv_txfm_2d_cfg_dct_adst_32 },
+  { &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
+    &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
+  { &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
+    &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
+  { &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
+    &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
+};
+#else
+static const TXFM_2D_CFG *inv_txfm_cfg_ls[TX_TYPES][TX_SIZES] = {
+  { &inv_txfm_2d_cfg_dct_dct_4, &inv_txfm_2d_cfg_dct_dct_8,
+    &inv_txfm_2d_cfg_dct_dct_16, &inv_txfm_2d_cfg_dct_dct_32 },
+  { &inv_txfm_2d_cfg_adst_dct_4, &inv_txfm_2d_cfg_adst_dct_8,
+    &inv_txfm_2d_cfg_adst_dct_16, &inv_txfm_2d_cfg_adst_dct_32 },
+  { &inv_txfm_2d_cfg_dct_adst_4, &inv_txfm_2d_cfg_dct_adst_8,
+    &inv_txfm_2d_cfg_dct_adst_16, &inv_txfm_2d_cfg_dct_adst_32 },
+  { &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
+    &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
+};
+#endif
+
+TXFM_2D_FLIP_CFG vp10_get_inv_txfm_cfg(int tx_type, int tx_size) {
+  TXFM_2D_FLIP_CFG cfg;
+  set_flip_cfg(tx_type, &cfg);
+  cfg.cfg = inv_txfm_cfg_ls[tx_type][tx_size];
+  return cfg;
+}
+
+TXFM_2D_FLIP_CFG vp10_get_inv_txfm_64x64_cfg(int tx_type) {
+  TXFM_2D_FLIP_CFG cfg = { 0, 0, NULL };
+  switch (tx_type) {
+    case DCT_DCT:
+      cfg.cfg = &inv_txfm_2d_cfg_dct_dct_64;
+      set_flip_cfg(tx_type, &cfg);
+      break;
+    default: assert(0);
+  }
+  return cfg;
+}
+
+static INLINE void inv_txfm2d_add_c(const int32_t *input, int16_t *output,
+                                    int stride, TXFM_2D_FLIP_CFG *cfg,
+                                    int32_t *txfm_buf) {
+  const int txfm_size = cfg->cfg->txfm_size;
+  const int8_t *shift = cfg->cfg->shift;
+  const int8_t *stage_range_col = cfg->cfg->stage_range_col;
+  const int8_t *stage_range_row = cfg->cfg->stage_range_row;
+  const int8_t *cos_bit_col = cfg->cfg->cos_bit_col;
+  const int8_t *cos_bit_row = cfg->cfg->cos_bit_row;
+  const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->cfg->txfm_type_col);
+  const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->cfg->txfm_type_row);
+
+  // txfm_buf's length is  txfm_size * txfm_size + 2 * txfm_size
+  // it is used for intermediate data buffering
+  int32_t *temp_in = txfm_buf;
+  int32_t *temp_out = temp_in + txfm_size;
+  int32_t *buf = temp_out + txfm_size;
+  int32_t *buf_ptr = buf;
+  int c, r;
+
+  // Rows
+  for (r = 0; r < txfm_size; ++r) {
+    txfm_func_row(input, buf_ptr, cos_bit_row, stage_range_row);
+    round_shift_array(buf_ptr, txfm_size, -shift[0]);
+    input += txfm_size;
+    buf_ptr += txfm_size;
+  }
+
+  // Columns
+  for (c = 0; c < txfm_size; ++c) {
+    if (cfg->lr_flip == 0) {
+      for (r = 0; r < txfm_size; ++r) temp_in[r] = buf[r * txfm_size + c];
+    } else {
+      // flip left right
+      for (r = 0; r < txfm_size; ++r)
+        temp_in[r] = buf[r * txfm_size + (txfm_size - c - 1)];
+    }
+    txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
+    round_shift_array(temp_out, txfm_size, -shift[1]);
+    if (cfg->ud_flip == 0) {
+      for (r = 0; r < txfm_size; ++r) output[r * stride + c] += temp_out[r];
+    } else {
+      // flip upside down
+      for (r = 0; r < txfm_size; ++r)
+        output[r * stride + c] += temp_out[txfm_size - r - 1];
+    }
+  }
+}
+
+void vp10_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output,
+                               int stride, int tx_type, int bd) {
+  int txfm_buf[4 * 4 + 4 + 4];
+  // output contains the prediction signal which is always positive and smaller
+  // than (1 << bd) - 1
+  // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
+  // int16_t*
+  TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_cfg(tx_type, TX_4X4);
+  inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);
+  clamp_block((int16_t *)output, 4, stride, 0, (1 << bd) - 1);
+}
+
+void vp10_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output,
+                               int stride, int tx_type, int bd) {
+  int txfm_buf[8 * 8 + 8 + 8];
+  // output contains the prediction signal which is always positive and smaller
+  // than (1 << bd) - 1
+  // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
+  // int16_t*
+  TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_cfg(tx_type, TX_8X8);
+  inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);
+  clamp_block((int16_t *)output, 8, stride, 0, (1 << bd) - 1);
+}
+
+void vp10_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output,
+                                 int stride, int tx_type, int bd) {
+  int txfm_buf[16 * 16 + 16 + 16];
+  // output contains the prediction signal which is always positive and smaller
+  // than (1 << bd) - 1
+  // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
+  // int16_t*
+  TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_cfg(tx_type, TX_16X16);
+  inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);
+  clamp_block((int16_t *)output, 16, stride, 0, (1 << bd) - 1);
+}
+
+void vp10_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output,
+                                 int stride, int tx_type, int bd) {
+  int txfm_buf[32 * 32 + 32 + 32];
+  // output contains the prediction signal which is always positive and smaller
+  // than (1 << bd) - 1
+  // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
+  // int16_t*
+  TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_cfg(tx_type, TX_32X32);
+  inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);
+  clamp_block((int16_t *)output, 32, stride, 0, (1 << bd) - 1);
+}
+
+void vp10_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output,
+                                 int stride, int tx_type, int bd) {
+  int txfm_buf[64 * 64 + 64 + 64];
+  // output contains the prediction signal which is always positive and smaller
+  // than (1 << bd) - 1
+  // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
+  // int16_t*
+  TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_64x64_cfg(tx_type);
+  inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);
+  clamp_block((int16_t *)output, 64, stride, 0, (1 << bd) - 1);
+}
diff --git a/av1/common/vp10_inv_txfm2d_cfg.h b/av1/common/vp10_inv_txfm2d_cfg.h
new file mode 100644
index 0000000..9bfa420
--- /dev/null
+++ b/av1/common/vp10_inv_txfm2d_cfg.h
@@ -0,0 +1,444 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_INV_TXFM2D_CFG_H_
+#define VP10_INV_TXFM2D_CFG_H_
+#include "av1/common/vp10_inv_txfm1d.h"
+//  ---------------- config inv_dct_dct_4 ----------------
+static const int8_t inv_shift_dct_dct_4[2] = { 0, -4 };
+static const int8_t inv_stage_range_col_dct_dct_4[4] = { 18, 18, 17, 17 };
+static const int8_t inv_stage_range_row_dct_dct_4[4] = { 18, 18, 18, 18 };
+static const int8_t inv_cos_bit_col_dct_dct_4[4] = { 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_dct_dct_4[4] = { 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_4 = {
+  4,  // .txfm_size
+  4,  // .stage_num_col
+  4,  // .stage_num_row
+  // 0,  // .log_scale
+  inv_shift_dct_dct_4,            // .shift
+  inv_stage_range_col_dct_dct_4,  // .stage_range_col
+  inv_stage_range_row_dct_dct_4,  // .stage_range_row
+  inv_cos_bit_col_dct_dct_4,      // .cos_bit_col
+  inv_cos_bit_row_dct_dct_4,      // .cos_bit_row
+  TXFM_TYPE_DCT4,                 // .txfm_type_col
+  TXFM_TYPE_DCT4
+};  // .txfm_type_row
+
+//  ---------------- config inv_dct_dct_8 ----------------
+static const int8_t inv_shift_dct_dct_8[2] = { 0, -5 };
+static const int8_t inv_stage_range_col_dct_dct_8[6] = {
+  19, 19, 19, 19, 18, 18
+};
+static const int8_t inv_stage_range_row_dct_dct_8[6] = {
+  19, 19, 19, 19, 19, 19
+};
+static const int8_t inv_cos_bit_col_dct_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_dct_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_8 = {
+  8,  // .txfm_size
+  6,  // .stage_num_col
+  6,  // .stage_num_row
+  // 0,  // .log_scale
+  inv_shift_dct_dct_8,            // .shift
+  inv_stage_range_col_dct_dct_8,  // .stage_range_col
+  inv_stage_range_row_dct_dct_8,  // .stage_range_row
+  inv_cos_bit_col_dct_dct_8,      // .cos_bit_col
+  inv_cos_bit_row_dct_dct_8,      // .cos_bit_row
+  TXFM_TYPE_DCT8,                 // .txfm_type_col
+  TXFM_TYPE_DCT8
+};  // .txfm_type_row
+
+//  ---------------- config inv_dct_dct_16 ----------------
+static const int8_t inv_shift_dct_dct_16[2] = { -1, -5 };
+static const int8_t inv_stage_range_col_dct_dct_16[8] = { 19, 19, 19, 19,
+                                                          19, 19, 18, 18 };
+static const int8_t inv_stage_range_row_dct_dct_16[8] = { 20, 20, 20, 20,
+                                                          20, 20, 20, 20 };
+static const int8_t inv_cos_bit_col_dct_dct_16[8] = { 13, 13, 13, 13,
+                                                      13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_dct_dct_16[8] = { 12, 12, 12, 12,
+                                                      12, 12, 12, 12 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_16 = {
+  16,  // .txfm_size
+  8,   // .stage_num_col
+  8,   // .stage_num_row
+  // 0,  // .log_scale
+  inv_shift_dct_dct_16,            // .shift
+  inv_stage_range_col_dct_dct_16,  // .stage_range_col
+  inv_stage_range_row_dct_dct_16,  // .stage_range_row
+  inv_cos_bit_col_dct_dct_16,      // .cos_bit_col
+  inv_cos_bit_row_dct_dct_16,      // .cos_bit_row
+  TXFM_TYPE_DCT16,                 // .txfm_type_col
+  TXFM_TYPE_DCT16
+};  // .txfm_type_row
+
+//  ---------------- config inv_dct_dct_32 ----------------
+static const int8_t inv_shift_dct_dct_32[2] = { -1, -5 };
+static const int8_t inv_stage_range_col_dct_dct_32[10] = { 19, 19, 19, 19, 19,
+                                                           19, 19, 19, 18, 18 };
+static const int8_t inv_stage_range_row_dct_dct_32[10] = { 20, 20, 20, 20, 20,
+                                                           20, 20, 20, 20, 20 };
+static const int8_t inv_cos_bit_col_dct_dct_32[10] = { 13, 13, 13, 13, 13,
+                                                       13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_dct_dct_32[10] = { 12, 12, 12, 12, 12,
+                                                       12, 12, 12, 12, 12 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_32 = {
+  32,  // .txfm_size
+  10,  // .stage_num_col
+  10,  // .stage_num_row
+  // 1,  // .log_scale
+  inv_shift_dct_dct_32,            // .shift
+  inv_stage_range_col_dct_dct_32,  // .stage_range_col
+  inv_stage_range_row_dct_dct_32,  // .stage_range_row
+  inv_cos_bit_col_dct_dct_32,      // .cos_bit_col
+  inv_cos_bit_row_dct_dct_32,      // .cos_bit_row
+  TXFM_TYPE_DCT32,                 // .txfm_type_col
+  TXFM_TYPE_DCT32
+};  // .txfm_type_row
+
+//  ---------------- config inv_dct_dct_64 ----------------
+static const int8_t inv_shift_dct_dct_64[2] = { -1, -7 };
+static const int8_t inv_stage_range_col_dct_dct_64[12] = {
+  19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18
+};
+static const int8_t inv_stage_range_row_dct_dct_64[12] = {
+  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+};
+static const int8_t inv_cos_bit_col_dct_dct_64[12] = { 13, 13, 13, 13, 13, 13,
+                                                       13, 13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_dct_dct_64[12] = { 12, 12, 12, 12, 12, 12,
+                                                       12, 12, 12, 12, 12, 12 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_64 = {
+  64,                              // .txfm_size
+  12,                              // .stage_num_col
+  12,                              // .stage_num_row
+  inv_shift_dct_dct_64,            // .shift
+  inv_stage_range_col_dct_dct_64,  // .stage_range_col
+  inv_stage_range_row_dct_dct_64,  // .stage_range_row
+  inv_cos_bit_col_dct_dct_64,      // .cos_bit_col
+  inv_cos_bit_row_dct_dct_64,      // .cos_bit_row
+  TXFM_TYPE_DCT64,                 // .txfm_type_col
+  TXFM_TYPE_DCT64
+};  // .txfm_type_row
+
+//  ---------------- config inv_dct_adst_4 ----------------
+static const int8_t inv_shift_dct_adst_4[2] = { 0, -4 };
+static const int8_t inv_stage_range_col_dct_adst_4[4] = { 18, 18, 17, 17 };
+static const int8_t inv_stage_range_row_dct_adst_4[6] = {
+  18, 18, 18, 18, 18, 18
+};
+static const int8_t inv_cos_bit_col_dct_adst_4[4] = { 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_dct_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_4 = {
+  4,  // .txfm_size
+  4,  // .stage_num_col
+  6,  // .stage_num_row
+  // 0,  // .log_scale
+  inv_shift_dct_adst_4,            // .shift
+  inv_stage_range_col_dct_adst_4,  // .stage_range_col
+  inv_stage_range_row_dct_adst_4,  // .stage_range_row
+  inv_cos_bit_col_dct_adst_4,      // .cos_bit_col
+  inv_cos_bit_row_dct_adst_4,      // .cos_bit_row
+  TXFM_TYPE_DCT4,                  // .txfm_type_col
+  TXFM_TYPE_ADST4
+};  // .txfm_type_row
+
+//  ---------------- config inv_dct_adst_8 ----------------
+static const int8_t inv_shift_dct_adst_8[2] = { 0, -5 };
+static const int8_t inv_stage_range_col_dct_adst_8[6] = {
+  19, 19, 19, 19, 18, 18
+};
+static const int8_t inv_stage_range_row_dct_adst_8[8] = { 19, 19, 19, 19,
+                                                          19, 19, 19, 19 };
+static const int8_t inv_cos_bit_col_dct_adst_8[6] = { 13, 13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_dct_adst_8[8] = { 13, 13, 13, 13,
+                                                      13, 13, 13, 13 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_8 = {
+  8,  // .txfm_size
+  6,  // .stage_num_col
+  8,  // .stage_num_row
+  // 0,  // .log_scale
+  inv_shift_dct_adst_8,            // .shift
+  inv_stage_range_col_dct_adst_8,  // .stage_range_col
+  inv_stage_range_row_dct_adst_8,  // .stage_range_row
+  inv_cos_bit_col_dct_adst_8,      // .cos_bit_col
+  inv_cos_bit_row_dct_adst_8,      // .cos_bit_row
+  TXFM_TYPE_DCT8,                  // .txfm_type_col
+  TXFM_TYPE_ADST8
+};  // .txfm_type_row
+
+//  ---------------- config inv_dct_adst_16 ----------------
+static const int8_t inv_shift_dct_adst_16[2] = { -1, -5 };
+static const int8_t inv_stage_range_col_dct_adst_16[8] = { 19, 19, 19, 19,
+                                                           19, 19, 18, 18 };
+static const int8_t inv_stage_range_row_dct_adst_16[10] = {
+  20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+};
+static const int8_t inv_cos_bit_col_dct_adst_16[8] = { 13, 13, 13, 13,
+                                                       13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_dct_adst_16[10] = { 12, 12, 12, 12, 12,
+                                                        12, 12, 12, 12, 12 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_16 = {
+  16,  // .txfm_size
+  8,   // .stage_num_col
+  10,  // .stage_num_row
+  // 0,  // .log_scale
+  inv_shift_dct_adst_16,            // .shift
+  inv_stage_range_col_dct_adst_16,  // .stage_range_col
+  inv_stage_range_row_dct_adst_16,  // .stage_range_row
+  inv_cos_bit_col_dct_adst_16,      // .cos_bit_col
+  inv_cos_bit_row_dct_adst_16,      // .cos_bit_row
+  TXFM_TYPE_DCT16,                  // .txfm_type_col
+  TXFM_TYPE_ADST16
+};  // .txfm_type_row
+
+//  ---------------- config inv_dct_adst_32 ----------------
+static const int8_t inv_shift_dct_adst_32[2] = { -1, -5 };
+static const int8_t inv_stage_range_col_dct_adst_32[10] = {
+  19, 19, 19, 19, 19, 19, 19, 19, 18, 18
+};
+static const int8_t inv_stage_range_row_dct_adst_32[12] = {
+  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+};
+static const int8_t inv_cos_bit_col_dct_adst_32[10] = { 13, 13, 13, 13, 13,
+                                                        13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_dct_adst_32[12] = {
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
+};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_32 = {
+  32,  // .txfm_size
+  10,  // .stage_num_col
+  12,  // .stage_num_row
+  // 1,  // .log_scale
+  inv_shift_dct_adst_32,            // .shift
+  inv_stage_range_col_dct_adst_32,  // .stage_range_col
+  inv_stage_range_row_dct_adst_32,  // .stage_range_row
+  inv_cos_bit_col_dct_adst_32,      // .cos_bit_col
+  inv_cos_bit_row_dct_adst_32,      // .cos_bit_row
+  TXFM_TYPE_DCT32,                  // .txfm_type_col
+  TXFM_TYPE_ADST32
+};  // .txfm_type_row
+
+//  ---------------- config inv_adst_adst_4 ----------------
+static const int8_t inv_shift_adst_adst_4[2] = { 0, -4 };
+static const int8_t inv_stage_range_col_adst_adst_4[6] = { 18, 18, 18,
+                                                           18, 17, 17 };
+static const int8_t inv_stage_range_row_adst_adst_4[6] = { 18, 18, 18,
+                                                           18, 18, 18 };
+static const int8_t inv_cos_bit_col_adst_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_adst_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_4 = {
+  4,  // .txfm_size
+  6,  // .stage_num_col
+  6,  // .stage_num_row
+  // 0,  // .log_scale
+  inv_shift_adst_adst_4,            // .shift
+  inv_stage_range_col_adst_adst_4,  // .stage_range_col
+  inv_stage_range_row_adst_adst_4,  // .stage_range_row
+  inv_cos_bit_col_adst_adst_4,      // .cos_bit_col
+  inv_cos_bit_row_adst_adst_4,      // .cos_bit_row
+  TXFM_TYPE_ADST4,                  // .txfm_type_col
+  TXFM_TYPE_ADST4
+};  // .txfm_type_row
+
+//  ---------------- config inv_adst_adst_8 ----------------
+static const int8_t inv_shift_adst_adst_8[2] = { 0, -5 };
+static const int8_t inv_stage_range_col_adst_adst_8[8] = { 19, 19, 19, 19,
+                                                           19, 19, 18, 18 };
+static const int8_t inv_stage_range_row_adst_adst_8[8] = { 19, 19, 19, 19,
+                                                           19, 19, 19, 19 };
+static const int8_t inv_cos_bit_col_adst_adst_8[8] = { 13, 13, 13, 13,
+                                                       13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_adst_adst_8[8] = { 13, 13, 13, 13,
+                                                       13, 13, 13, 13 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_8 = {
+  8,  // .txfm_size
+  8,  // .stage_num_col
+  8,  // .stage_num_row
+  // 0,  // .log_scale
+  inv_shift_adst_adst_8,            // .shift
+  inv_stage_range_col_adst_adst_8,  // .stage_range_col
+  inv_stage_range_row_adst_adst_8,  // .stage_range_row
+  inv_cos_bit_col_adst_adst_8,      // .cos_bit_col
+  inv_cos_bit_row_adst_adst_8,      // .cos_bit_row
+  TXFM_TYPE_ADST8,                  // .txfm_type_col
+  TXFM_TYPE_ADST8
+};  // .txfm_type_row
+
+//  ---------------- config inv_adst_adst_16 ----------------
+static const int8_t inv_shift_adst_adst_16[2] = { -1, -5 };
+static const int8_t inv_stage_range_col_adst_adst_16[10] = {
+  19, 19, 19, 19, 19, 19, 19, 19, 18, 18
+};
+static const int8_t inv_stage_range_row_adst_adst_16[10] = {
+  20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+};
+static const int8_t inv_cos_bit_col_adst_adst_16[10] = { 13, 13, 13, 13, 13,
+                                                         13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_adst_adst_16[10] = { 12, 12, 12, 12, 12,
+                                                         12, 12, 12, 12, 12 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_16 = {
+  16,  // .txfm_size
+  10,  // .stage_num_col
+  10,  // .stage_num_row
+  // 0,  // .log_scale
+  inv_shift_adst_adst_16,            // .shift
+  inv_stage_range_col_adst_adst_16,  // .stage_range_col
+  inv_stage_range_row_adst_adst_16,  // .stage_range_row
+  inv_cos_bit_col_adst_adst_16,      // .cos_bit_col
+  inv_cos_bit_row_adst_adst_16,      // .cos_bit_row
+  TXFM_TYPE_ADST16,                  // .txfm_type_col
+  TXFM_TYPE_ADST16
+};  // .txfm_type_row
+
+//  ---------------- config inv_adst_adst_32 ----------------
+static const int8_t inv_shift_adst_adst_32[2] = { -1, -5 };
+static const int8_t inv_stage_range_col_adst_adst_32[12] = {
+  19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18
+};
+static const int8_t inv_stage_range_row_adst_adst_32[12] = {
+  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+};
+static const int8_t inv_cos_bit_col_adst_adst_32[12] = {
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13
+};
+static const int8_t inv_cos_bit_row_adst_adst_32[12] = {
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
+};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_32 = {
+  32,  // .txfm_size
+  12,  // .stage_num_col
+  12,  // .stage_num_row
+  // 1,  // .log_scale
+  inv_shift_adst_adst_32,            // .shift
+  inv_stage_range_col_adst_adst_32,  // .stage_range_col
+  inv_stage_range_row_adst_adst_32,  // .stage_range_row
+  inv_cos_bit_col_adst_adst_32,      // .cos_bit_col
+  inv_cos_bit_row_adst_adst_32,      // .cos_bit_row
+  TXFM_TYPE_ADST32,                  // .txfm_type_col
+  TXFM_TYPE_ADST32
+};  // .txfm_type_row
+
+//  ---------------- config inv_adst_dct_4 ----------------
+static const int8_t inv_shift_adst_dct_4[2] = { 0, -4 };
+static const int8_t inv_stage_range_col_adst_dct_4[6] = {
+  18, 18, 18, 18, 17, 17
+};
+static const int8_t inv_stage_range_row_adst_dct_4[4] = { 18, 18, 18, 18 };
+static const int8_t inv_cos_bit_col_adst_dct_4[6] = { 13, 13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_adst_dct_4[4] = { 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_4 = {
+  4,  // .txfm_size
+  6,  // .stage_num_col
+  4,  // .stage_num_row
+  // 0,  // .log_scale
+  inv_shift_adst_dct_4,            // .shift
+  inv_stage_range_col_adst_dct_4,  // .stage_range_col
+  inv_stage_range_row_adst_dct_4,  // .stage_range_row
+  inv_cos_bit_col_adst_dct_4,      // .cos_bit_col
+  inv_cos_bit_row_adst_dct_4,      // .cos_bit_row
+  TXFM_TYPE_ADST4,                 // .txfm_type_col
+  TXFM_TYPE_DCT4
+};  // .txfm_type_row
+
+//  ---------------- config inv_adst_dct_8 ----------------
+static const int8_t inv_shift_adst_dct_8[2] = { 0, -5 };
+static const int8_t inv_stage_range_col_adst_dct_8[8] = { 19, 19, 19, 19,
+                                                          19, 19, 18, 18 };
+static const int8_t inv_stage_range_row_adst_dct_8[6] = {
+  19, 19, 19, 19, 19, 19
+};
+static const int8_t inv_cos_bit_col_adst_dct_8[8] = { 13, 13, 13, 13,
+                                                      13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_adst_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_8 = {
+  8,  // .txfm_size
+  8,  // .stage_num_col
+  6,  // .stage_num_row
+  // 0,  // .log_scale
+  inv_shift_adst_dct_8,            // .shift
+  inv_stage_range_col_adst_dct_8,  // .stage_range_col
+  inv_stage_range_row_adst_dct_8,  // .stage_range_row
+  inv_cos_bit_col_adst_dct_8,      // .cos_bit_col
+  inv_cos_bit_row_adst_dct_8,      // .cos_bit_row
+  TXFM_TYPE_ADST8,                 // .txfm_type_col
+  TXFM_TYPE_DCT8
+};  // .txfm_type_row
+
+//  ---------------- config inv_adst_dct_16 ----------------
+static const int8_t inv_shift_adst_dct_16[2] = { -1, -5 };
+static const int8_t inv_stage_range_col_adst_dct_16[10] = {
+  19, 19, 19, 19, 19, 19, 19, 19, 18, 18
+};
+static const int8_t inv_stage_range_row_adst_dct_16[8] = { 20, 20, 20, 20,
+                                                           20, 20, 20, 20 };
+static const int8_t inv_cos_bit_col_adst_dct_16[10] = { 13, 13, 13, 13, 13,
+                                                        13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_adst_dct_16[8] = { 12, 12, 12, 12,
+                                                       12, 12, 12, 12 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_16 = {
+  16,  // .txfm_size
+  10,  // .stage_num_col
+  8,   // .stage_num_row
+  // 0,  // .log_scale
+  inv_shift_adst_dct_16,            // .shift
+  inv_stage_range_col_adst_dct_16,  // .stage_range_col
+  inv_stage_range_row_adst_dct_16,  // .stage_range_row
+  inv_cos_bit_col_adst_dct_16,      // .cos_bit_col
+  inv_cos_bit_row_adst_dct_16,      // .cos_bit_row
+  TXFM_TYPE_ADST16,                 // .txfm_type_col
+  TXFM_TYPE_DCT16
+};  // .txfm_type_row
+
+//  ---------------- config inv_adst_dct_32 ----------------
+static const int8_t inv_shift_adst_dct_32[2] = { -1, -5 };
+static const int8_t inv_stage_range_col_adst_dct_32[12] = {
+  19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18
+};
+static const int8_t inv_stage_range_row_adst_dct_32[10] = {
+  20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+};
+static const int8_t inv_cos_bit_col_adst_dct_32[12] = {
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13
+};
+static const int8_t inv_cos_bit_row_adst_dct_32[10] = { 12, 12, 12, 12, 12,
+                                                        12, 12, 12, 12, 12 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_32 = {
+  32,  // .txfm_size
+  12,  // .stage_num_col
+  10,  // .stage_num_row
+  // 1,  // .log_scale
+  inv_shift_adst_dct_32,            // .shift
+  inv_stage_range_col_adst_dct_32,  // .stage_range_col
+  inv_stage_range_row_adst_dct_32,  // .stage_range_row
+  inv_cos_bit_col_adst_dct_32,      // .cos_bit_col
+  inv_cos_bit_row_adst_dct_32,      // .cos_bit_row
+  TXFM_TYPE_ADST32,                 // .txfm_type_col
+  TXFM_TYPE_DCT32
+};  // .txfm_type_row
+
+#endif  // VP10_INV_TXFM2D_CFG_H_
diff --git a/av1/common/vp10_rtcd.c b/av1/common/vp10_rtcd.c
new file mode 100644
index 0000000..7fce6b9
--- /dev/null
+++ b/av1/common/vp10_rtcd.c
@@ -0,0 +1,19 @@
+/*
+ *  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "./vpx_config.h"
+#define RTCD_C
+#include "./vp10_rtcd.h"
+#include "aom_ports/vpx_once.h"
+
+void vp10_rtcd() {
+  // TODO(JBB): Remove this once, by insuring that both the encoder and
+  // decoder setup functions are protected by once();
+  once(setup_rtcd_internal);
+}
diff --git a/av1/common/vp10_rtcd_defs.pl b/av1/common/vp10_rtcd_defs.pl
new file mode 100644
index 0000000..4a16723
--- /dev/null
+++ b/av1/common/vp10_rtcd_defs.pl
@@ -0,0 +1,912 @@
+sub vp10_common_forward_decls() {
+print <<EOF
+/*
+ * VP10
+ */
+
+#include "aom/vpx_integer.h"
+#include "av1/common/common.h"
+#include "av1/common/enums.h"
+#include "av1/common/quant_common.h"
+#include "av1/common/filter.h"
+#include "av1/common/vp10_txfm.h"
+
+struct macroblockd;
+
+/* Encoder forward decls */
+struct macroblock;
+struct vpx_variance_vtable;
+struct search_site_config;
+struct mv;
+union int_mv;
+struct yv12_buffer_config;
+EOF
+}
+forward_decls qw/vp10_common_forward_decls/;
+
+# functions that are 64 bit only.
+$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
+if ($opts{arch} eq "x86_64") {
+  $mmx_x86_64 = 'mmx';
+  $sse2_x86_64 = 'sse2';
+  $ssse3_x86_64 = 'ssse3';
+  $avx_x86_64 = 'avx';
+  $avx2_x86_64 = 'avx2';
+}
+
+#
+# 10/12-tap convolution filters
+#
+add_proto qw/void vp10_convolve_horiz/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg";
+specialize qw/vp10_convolve_horiz ssse3/;
+
+add_proto qw/void vp10_convolve_vert/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg";
+specialize qw/vp10_convolve_vert ssse3/;
+
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+  add_proto qw/void vp10_highbd_convolve_horiz/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd";
+  specialize qw/vp10_highbd_convolve_horiz sse4_1/;
+  add_proto qw/void vp10_highbd_convolve_vert/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd";
+  specialize qw/vp10_highbd_convolve_vert sse4_1/;
+}
+
+#
+# dct
+#
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+  # Note as optimized versions of these functions are added we need to add a check to ensure
+  # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
+  if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
+    add_proto qw/void vp10_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht4x4_16_add/;
+
+    add_proto qw/void vp10_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht4x8_32_add/;
+
+    add_proto qw/void vp10_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht8x4_32_add/;
+
+    add_proto qw/void vp10_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht8x16_128_add/;
+
+    add_proto qw/void vp10_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht16x8_128_add/;
+
+    add_proto qw/void vp10_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht16x32_512_add/;
+
+    add_proto qw/void vp10_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht32x16_512_add/;
+
+    add_proto qw/void vp10_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht8x8_64_add/;
+
+    add_proto qw/void vp10_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
+    specialize qw/vp10_iht16x16_256_add/;
+
+    add_proto qw/void vp10_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct4x4/;
+
+    add_proto qw/void vp10_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct4x4_1/;
+
+    add_proto qw/void vp10_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct8x8/;
+
+    add_proto qw/void vp10_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct8x8_1/;
+
+    add_proto qw/void vp10_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct16x16/;
+
+    add_proto qw/void vp10_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct16x16_1/;
+
+    add_proto qw/void vp10_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct32x32/;
+
+    add_proto qw/void vp10_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct32x32_rd/;
+
+    add_proto qw/void vp10_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct32x32_1/;
+
+    add_proto qw/void vp10_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_highbd_fdct4x4/;
+
+    add_proto qw/void vp10_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_highbd_fdct8x8/;
+
+    add_proto qw/void vp10_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_highbd_fdct8x8_1/;
+
+    add_proto qw/void vp10_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_highbd_fdct16x16/;
+
+    add_proto qw/void vp10_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_highbd_fdct16x16_1/;
+
+    add_proto qw/void vp10_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_highbd_fdct32x32/;
+
+    add_proto qw/void vp10_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_highbd_fdct32x32_rd/;
+
+    add_proto qw/void vp10_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_highbd_fdct32x32_1/;
+  } else {
+    add_proto qw/void vp10_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht4x4_16_add sse2/;
+
+    add_proto qw/void vp10_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht4x8_32_add/;
+
+    add_proto qw/void vp10_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht8x4_32_add/;
+
+    add_proto qw/void vp10_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht8x16_128_add/;
+
+    add_proto qw/void vp10_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht16x8_128_add/;
+
+    add_proto qw/void vp10_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht16x32_512_add/;
+
+    add_proto qw/void vp10_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht32x16_512_add/;
+
+    add_proto qw/void vp10_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht8x8_64_add sse2/;
+
+    add_proto qw/void vp10_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
+    specialize qw/vp10_iht16x16_256_add sse2/;
+
+    add_proto qw/void vp10_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct4x4 sse2/;
+
+    add_proto qw/void vp10_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct4x4_1 sse2/;
+
+    add_proto qw/void vp10_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct8x8 sse2/;
+
+    add_proto qw/void vp10_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct8x8_1 sse2/;
+
+    add_proto qw/void vp10_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct16x16 sse2/;
+
+    add_proto qw/void vp10_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct16x16_1 sse2/;
+
+    add_proto qw/void vp10_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct32x32 sse2/;
+
+    add_proto qw/void vp10_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct32x32_rd sse2/;
+
+    add_proto qw/void vp10_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct32x32_1 sse2/;
+
+    add_proto qw/void vp10_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_highbd_fdct4x4 sse2/;
+
+    add_proto qw/void vp10_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_highbd_fdct8x8 sse2/;
+
+    add_proto qw/void vp10_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_highbd_fdct8x8_1/;
+
+    add_proto qw/void vp10_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_highbd_fdct16x16 sse2/;
+
+    add_proto qw/void vp10_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_highbd_fdct16x16_1/;
+
+    add_proto qw/void vp10_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_highbd_fdct32x32 sse2/;
+
+    add_proto qw/void vp10_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_highbd_fdct32x32_rd sse2/;
+
+    add_proto qw/void vp10_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_highbd_fdct32x32_1/;
+  }
+} else {
+  # Force C versions if CONFIG_EMULATE_HARDWARE is 1
+  if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
+    add_proto qw/void vp10_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht4x4_16_add/;
+
+    add_proto qw/void vp10_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht4x8_32_add/;
+
+    add_proto qw/void vp10_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht8x4_32_add/;
+
+    add_proto qw/void vp10_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht8x16_128_add/;
+
+    add_proto qw/void vp10_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht16x8_128_add/;
+
+    add_proto qw/void vp10_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht16x32_512_add/;
+
+    add_proto qw/void vp10_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht32x16_512_add/;
+
+    add_proto qw/void vp10_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht8x8_64_add/;
+
+    add_proto qw/void vp10_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
+    specialize qw/vp10_iht16x16_256_add/;
+
+    add_proto qw/void vp10_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct4x4/;
+
+    add_proto qw/void vp10_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct4x4_1/;
+
+    add_proto qw/void vp10_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct8x8/;
+
+    add_proto qw/void vp10_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct8x8_1/;
+
+    add_proto qw/void vp10_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct16x16/;
+
+    add_proto qw/void vp10_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct16x16_1/;
+
+    add_proto qw/void vp10_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct32x32/;
+
+    add_proto qw/void vp10_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct32x32_rd/;
+
+    add_proto qw/void vp10_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct32x32_1/;
+  } else {
+    add_proto qw/void vp10_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht4x4_16_add sse2 neon dspr2/;
+
+    add_proto qw/void vp10_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht4x8_32_add/;
+
+    add_proto qw/void vp10_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht8x4_32_add/;
+
+    add_proto qw/void vp10_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht8x16_128_add/;
+
+    add_proto qw/void vp10_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht16x8_128_add/;
+
+    add_proto qw/void vp10_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht16x32_512_add/;
+
+    add_proto qw/void vp10_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht32x16_512_add/;
+
+    add_proto qw/void vp10_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+    specialize qw/vp10_iht8x8_64_add sse2 neon dspr2/;
+
+    add_proto qw/void vp10_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
+    specialize qw/vp10_iht16x16_256_add sse2 dspr2/;
+
+    if (vpx_config("CONFIG_EXT_TX") ne "yes") {
+      specialize qw/vp10_iht4x4_16_add msa/;
+      specialize qw/vp10_iht8x8_64_add msa/;
+      specialize qw/vp10_iht16x16_256_add msa/;
+    }
+
+    add_proto qw/void vp10_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct4x4 sse2/;
+
+    add_proto qw/void vp10_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct4x4_1 sse2/;
+
+    add_proto qw/void vp10_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct8x8 sse2/;
+
+    add_proto qw/void vp10_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct8x8_1 sse2/;
+
+    add_proto qw/void vp10_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct16x16 sse2/;
+
+    add_proto qw/void vp10_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct16x16_1 sse2/;
+
+    add_proto qw/void vp10_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct32x32 sse2/;
+
+    add_proto qw/void vp10_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct32x32_rd sse2/;
+
+    add_proto qw/void vp10_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/vp10_fdct32x32_1 sse2/;
+  }
+}
+
+if (vpx_config("CONFIG_NEW_QUANT") eq "yes") {
+  add_proto qw/void quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+  specialize qw/quantize_nuq/;
+
+  add_proto qw/void quantize_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+  specialize qw/quantize_fp_nuq/;
+
+  add_proto qw/void quantize_32x32_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+  specialize qw/quantize_32x32_nuq/;
+
+  add_proto qw/void quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+  specialize qw/quantize_32x32_fp_nuq/;
+}
+
+# EXT_INTRA predictor functions
+if (vpx_config("CONFIG_EXT_INTRA") eq "yes") {
+  add_proto qw/void vp10_dc_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
+  specialize qw/vp10_dc_filter_predictor sse4_1/;
+  add_proto qw/void vp10_v_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
+  specialize qw/vp10_v_filter_predictor sse4_1/;
+  add_proto qw/void vp10_h_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
+  specialize qw/vp10_h_filter_predictor sse4_1/;
+  add_proto qw/void vp10_d45_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
+  specialize qw/vp10_d45_filter_predictor sse4_1/;
+  add_proto qw/void vp10_d135_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
+  specialize qw/vp10_d135_filter_predictor sse4_1/;
+  add_proto qw/void vp10_d117_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
+  specialize qw/vp10_d117_filter_predictor sse4_1/;
+  add_proto qw/void vp10_d153_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
+  specialize qw/vp10_d153_filter_predictor sse4_1/;
+  add_proto qw/void vp10_d207_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
+  specialize qw/vp10_d207_filter_predictor sse4_1/;
+  add_proto qw/void vp10_d63_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
+  specialize qw/vp10_d63_filter_predictor sse4_1/;
+  add_proto qw/void vp10_tm_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
+  specialize qw/vp10_tm_filter_predictor sse4_1/;
+  # High bitdepth functions
+  if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+    add_proto qw/void vp10_highbd_dc_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
+    specialize qw/vp10_highbd_dc_filter_predictor sse4_1/;
+    add_proto qw/void vp10_highbd_v_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
+    specialize qw/vp10_highbd_v_filter_predictor sse4_1/;
+    add_proto qw/void vp10_highbd_h_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
+    specialize qw/vp10_highbd_h_filter_predictor sse4_1/;
+    add_proto qw/void vp10_highbd_d45_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
+    specialize qw/vp10_highbd_d45_filter_predictor sse4_1/;
+    add_proto qw/void vp10_highbd_d135_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
+    specialize qw/vp10_highbd_d135_filter_predictor sse4_1/;
+    add_proto qw/void vp10_highbd_d117_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
+    specialize qw/vp10_highbd_d117_filter_predictor sse4_1/;
+    add_proto qw/void vp10_highbd_d153_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
+    specialize qw/vp10_highbd_d153_filter_predictor sse4_1/;
+    add_proto qw/void vp10_highbd_d207_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
+    specialize qw/vp10_highbd_d207_filter_predictor sse4_1/;
+    add_proto qw/void vp10_highbd_d63_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
+    specialize qw/vp10_highbd_d63_filter_predictor sse4_1/;
+    add_proto qw/void vp10_highbd_tm_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
+    specialize qw/vp10_highbd_tm_filter_predictor sse4_1/;
+  }
+}
+
+# High bitdepth functions
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+  #
+  # Sub Pixel Filters
+  #
+  add_proto qw/void vp10_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+  specialize qw/vp10_highbd_convolve_copy/;
+
+  add_proto qw/void vp10_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+  specialize qw/vp10_highbd_convolve_avg/;
+
+  add_proto qw/void vp10_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+  specialize qw/vp10_highbd_convolve8/, "$sse2_x86_64";
+
+  add_proto qw/void vp10_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+  specialize qw/vp10_highbd_convolve8_horiz/, "$sse2_x86_64";
+
+  add_proto qw/void vp10_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+  specialize qw/vp10_highbd_convolve8_vert/, "$sse2_x86_64";
+
+  add_proto qw/void vp10_highbd_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+  specialize qw/vp10_highbd_convolve8_avg/, "$sse2_x86_64";
+
+  add_proto qw/void vp10_highbd_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+  specialize qw/vp10_highbd_convolve8_avg_horiz/, "$sse2_x86_64";
+
+  add_proto qw/void vp10_highbd_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+  specialize qw/vp10_highbd_convolve8_avg_vert/, "$sse2_x86_64";
+
+  #
+  # dct
+  #
+  # Note as optimized versions of these functions are added we need to add a check to ensure
+  # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
+  add_proto qw/void vp10_highbd_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
+  specialize qw/vp10_highbd_iht4x4_16_add/;
+
+  add_proto qw/void vp10_highbd_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
+  specialize qw/vp10_highbd_iht4x8_32_add/;
+
+  add_proto qw/void vp10_highbd_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
+  specialize qw/vp10_highbd_iht8x4_32_add/;
+
+  add_proto qw/void vp10_highbd_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
+  specialize qw/vp10_highbd_iht8x16_128_add/;
+
+  add_proto qw/void vp10_highbd_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
+  specialize qw/vp10_highbd_iht16x8_128_add/;
+
+  add_proto qw/void vp10_highbd_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
+  specialize qw/vp10_highbd_iht16x32_512_add/;
+
+  add_proto qw/void vp10_highbd_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
+  specialize qw/vp10_highbd_iht32x16_512_add/;
+
+  add_proto qw/void vp10_highbd_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
+  specialize qw/vp10_highbd_iht8x8_64_add/;
+
+  add_proto qw/void vp10_highbd_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type, int bd";
+  specialize qw/vp10_highbd_iht16x16_256_add/;
+}
+
+#
+# Encoder functions below this point.
+#
+if (vpx_config("CONFIG_VP10_ENCODER") eq "yes") {
+
+# ENCODEMB INVOKE
+
+if (vpx_config("CONFIG_AOM_QM") eq "yes") {
+  if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
+    # the transform coefficients are held in 32-bit
+    # values, so the assembler code for  vp10_block_error can no longer be used.
+    add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
+    specialize qw/vp10_block_error/;
+
+    add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
+
+    add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
+
+    add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
+    specialize qw/vp10_fdct8x8_quant/;
+  } else {
+    add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
+    specialize qw/vp10_block_error avx2 msa/, "$sse2_x86inc";
+
+    add_proto qw/int64_t vp10_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
+    specialize qw/vp10_block_error_fp neon/, "$sse2_x86inc";
+
+    add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
+
+    add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
+
+    add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
+  }
+} else {
+  if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+    # the transform coefficients are held in 32-bit
+    # values, so the assembler code for  vp10_block_error can no longer be used.
+    add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
+    specialize qw/vp10_block_error/;
+
+    add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+    specialize qw/vp10_quantize_fp/;
+
+    add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+    specialize qw/vp10_quantize_fp_32x32/;
+
+    add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+    specialize qw/vp10_fdct8x8_quant/;
+  } else {
+    add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
+    specialize qw/vp10_block_error sse2 avx2 msa/;
+
+    add_proto qw/int64_t vp10_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
+    specialize qw/vp10_block_error_fp neon sse2/;
+
+    add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+    specialize qw/vp10_quantize_fp neon sse2/, "$ssse3_x86_64";
+
+    add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+    specialize qw/vp10_quantize_fp_32x32/, "$ssse3_x86_64";
+
+    add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+    specialize qw/vp10_fdct8x8_quant sse2 ssse3 neon/;
+  }
+
+}
+
+# fdct functions
+
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+  add_proto qw/void vp10_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_fht4x4 sse2/;
+
+  add_proto qw/void vp10_fht4x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_fht4x8/;
+
+  add_proto qw/void vp10_fht8x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_fht8x4/;
+
+  add_proto qw/void vp10_fht8x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_fht8x16/;
+
+  add_proto qw/void vp10_fht16x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_fht16x8/;
+
+  add_proto qw/void vp10_fht16x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_fht16x32/;
+
+  add_proto qw/void vp10_fht32x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_fht32x16/;
+
+  add_proto qw/void vp10_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_fht8x8 sse2/;
+
+  add_proto qw/void vp10_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_fht16x16 sse2/;
+
+  add_proto qw/void vp10_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_fht32x32/;
+
+  add_proto qw/void vp10_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
+  specialize qw/vp10_fwht4x4/;
+} else {
+  add_proto qw/void vp10_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_fht4x4 sse2/;
+
+  add_proto qw/void vp10_fht4x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_fht4x8/;
+
+  add_proto qw/void vp10_fht8x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_fht8x4/;
+
+  add_proto qw/void vp10_fht8x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_fht8x16/;
+
+  add_proto qw/void vp10_fht16x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_fht16x8/;
+
+  add_proto qw/void vp10_fht16x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_fht16x32/;
+
+  add_proto qw/void vp10_fht32x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_fht32x16/;
+
+  add_proto qw/void vp10_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_fht8x8 sse2/;
+
+  add_proto qw/void vp10_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_fht16x16 sse2/;
+
+  if (vpx_config("CONFIG_EXT_TX") ne "yes") {
+    specialize qw/vp10_fht4x4 msa/;
+    specialize qw/vp10_fht8x8 msa/;
+    specialize qw/vp10_fht16x16 msa/;
+  }
+
+  add_proto qw/void vp10_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_fht32x32/;
+
+  add_proto qw/void vp10_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
+  specialize qw/vp10_fwht4x4/;
+}
+
+add_proto qw/void vp10_fwd_idtx/, "const int16_t *src_diff, tran_low_t *coeff, int stride, int bs, int tx_type";
+  specialize qw/vp10_fwd_idtx/;
+
+# Inverse transform
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+  # Note as optimized versions of these functions are added we need to add a check to ensure
+  # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
+  add_proto qw/void vp10_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/vp10_idct4x4_1_add/;
+
+  add_proto qw/void vp10_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/vp10_idct4x4_16_add/;
+
+  add_proto qw/void vp10_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/vp10_idct8x8_1_add/;
+
+  add_proto qw/void vp10_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/vp10_idct8x8_64_add/;
+
+  add_proto qw/void vp10_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/vp10_idct8x8_12_add/;
+
+  add_proto qw/void vp10_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/vp10_idct16x16_1_add/;
+
+  add_proto qw/void vp10_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/vp10_idct16x16_256_add/;
+
+  add_proto qw/void vp10_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/vp10_idct16x16_10_add/;
+
+  add_proto qw/void vp10_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/vp10_idct32x32_1024_add/;
+
+  add_proto qw/void vp10_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/vp10_idct32x32_34_add/;
+
+  add_proto qw/void vp10_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/vp10_idct32x32_1_add/;
+
+  add_proto qw/void vp10_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/vp10_iwht4x4_1_add/;
+
+  add_proto qw/void vp10_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/vp10_iwht4x4_16_add/;
+
+  add_proto qw/void vp10_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+  specialize qw/vp10_highbd_idct4x4_1_add/;
+
+  add_proto qw/void vp10_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+  specialize qw/vp10_highbd_idct8x8_1_add/;
+
+  add_proto qw/void vp10_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+  specialize qw/vp10_highbd_idct16x16_1_add/;
+
+  add_proto qw/void vp10_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+  specialize qw/vp10_highbd_idct32x32_1024_add/;
+
+  add_proto qw/void vp10_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+  specialize qw/vp10_highbd_idct32x32_34_add/;
+
+  add_proto qw/void vp10_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+  specialize qw/vp10_highbd_idct32x32_1_add/;
+
+  add_proto qw/void vp10_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+  specialize qw/vp10_highbd_iwht4x4_1_add/;
+
+  add_proto qw/void vp10_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+  specialize qw/vp10_highbd_iwht4x4_16_add/;
+
+  # Force C versions if CONFIG_EMULATE_HARDWARE is 1
+  if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
+    add_proto qw/void vp10_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+    specialize qw/vp10_highbd_idct4x4_16_add/;
+
+    add_proto qw/void vp10_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+    specialize qw/vp10_highbd_idct8x8_64_add/;
+
+    add_proto qw/void vp10_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+    specialize qw/vp10_highbd_idct8x8_10_add/;
+
+    add_proto qw/void vp10_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+    specialize qw/vp10_highbd_idct16x16_256_add/;
+
+    add_proto qw/void vp10_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+    specialize qw/vp10_highbd_idct16x16_10_add/;
+  } else {
+    add_proto qw/void vp10_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+    specialize qw/vp10_highbd_idct4x4_16_add sse2/;
+
+    add_proto qw/void vp10_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+    specialize qw/vp10_highbd_idct8x8_64_add sse2/;
+
+    add_proto qw/void vp10_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+    specialize qw/vp10_highbd_idct8x8_10_add sse2/;
+
+    add_proto qw/void vp10_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+    specialize qw/vp10_highbd_idct16x16_256_add sse2/;
+
+    add_proto qw/void vp10_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+    specialize qw/vp10_highbd_idct16x16_10_add sse2/;
+  }  # CONFIG_EMULATE_HARDWARE
+} else {
+  # Force C versions if CONFIG_EMULATE_HARDWARE is 1
+  if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
+    add_proto qw/void vp10_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_idct4x4_1_add/;
+
+    add_proto qw/void vp10_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_idct4x4_16_add/;
+
+    add_proto qw/void vp10_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_idct8x8_1_add/;
+
+    add_proto qw/void vp10_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_idct8x8_64_add/;
+
+    add_proto qw/void vp10_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_idct8x8_12_add/;
+
+    add_proto qw/void vp10_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_idct16x16_1_add/;
+
+    add_proto qw/void vp10_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_idct16x16_256_add/;
+
+    add_proto qw/void vp10_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_idct16x16_10_add/;
+
+    add_proto qw/void vp10_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_idct32x32_1024_add/;
+
+    add_proto qw/void vp10_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_idct32x32_34_add/;
+
+    add_proto qw/void vp10_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_idct32x32_1_add/;
+
+    add_proto qw/void vp10_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_iwht4x4_1_add/;
+
+    add_proto qw/void vp10_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_iwht4x4_16_add/;
+  } else {
+    add_proto qw/void vp10_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_idct4x4_1_add sse2/;
+
+    add_proto qw/void vp10_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_idct4x4_16_add sse2/;
+
+    add_proto qw/void vp10_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_idct8x8_1_add sse2/;
+
+    add_proto qw/void vp10_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_idct8x8_64_add sse2/;
+
+    add_proto qw/void vp10_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_idct8x8_12_add sse2/;
+
+    add_proto qw/void vp10_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_idct16x16_1_add sse2/;
+
+    add_proto qw/void vp10_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_idct16x16_256_add sse2/;
+
+    add_proto qw/void vp10_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_idct16x16_10_add sse2/;
+
+    add_proto qw/void vp10_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_idct32x32_1024_add sse2/;
+
+    add_proto qw/void vp10_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_idct32x32_34_add sse2/;
+
+    add_proto qw/void vp10_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_idct32x32_1_add sse2/;
+
+    add_proto qw/void vp10_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_iwht4x4_1_add/;
+
+    add_proto qw/void vp10_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+    specialize qw/vp10_iwht4x4_16_add/;
+  }  # CONFIG_EMULATE_HARDWARE
+}  # CONFIG_VP9_HIGHBITDEPTH
+
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+  #fwd txfm
+  add_proto qw/void vp10_fwd_txfm2d_4x4/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
+  specialize qw/vp10_fwd_txfm2d_4x4 sse4_1/;
+  add_proto qw/void vp10_fwd_txfm2d_8x8/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
+  specialize qw/vp10_fwd_txfm2d_8x8 sse4_1/;
+  add_proto qw/void vp10_fwd_txfm2d_16x16/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
+  specialize qw/vp10_fwd_txfm2d_16x16 sse4_1/;
+  add_proto qw/void vp10_fwd_txfm2d_32x32/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
+  specialize qw/vp10_fwd_txfm2d_32x32 sse4_1/;
+  add_proto qw/void vp10_fwd_txfm2d_64x64/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
+  specialize qw/vp10_fwd_txfm2d_64x64 sse4_1/;
+
+  #inv txfm
+  add_proto qw/void vp10_inv_txfm2d_add_4x4/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
+  specialize qw/vp10_inv_txfm2d_add_4x4 sse4_1/;
+  add_proto qw/void vp10_inv_txfm2d_add_8x8/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
+  specialize qw/vp10_inv_txfm2d_add_8x8 sse4_1/;
+  add_proto qw/void vp10_inv_txfm2d_add_16x16/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
+  specialize qw/vp10_inv_txfm2d_add_16x16 sse4_1/;
+  add_proto qw/void vp10_inv_txfm2d_add_32x32/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
+  specialize qw/vp10_inv_txfm2d_add_32x32/;
+  add_proto qw/void vp10_inv_txfm2d_add_64x64/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
+  specialize qw/vp10_inv_txfm2d_add_64x64/;
+}
+
+#
+# Motion search
+#
+add_proto qw/int vp10_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vpx_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv";
+specialize qw/vp10_full_search_sad sse3 sse4_1/;
+$vp10_full_search_sad_sse3=vp10_full_search_sadx3;
+$vp10_full_search_sad_sse4_1=vp10_full_search_sadx8;
+
+add_proto qw/int vp10_diamond_search_sad/, "struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vpx_variance_vtable *fn_ptr, const struct mv *center_mv";
+specialize qw/vp10_diamond_search_sad/;
+
+add_proto qw/int vp10_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vpx_variance_vtable *fn_ptr, const struct mv *center_mv";
+specialize qw/vp10_full_range_search/;
+
+add_proto qw/void vp10_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
+specialize qw/vp10_temporal_filter_apply sse2 msa/;
+
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+
+  # ENCODEMB INVOKE
+  if (vpx_config("CONFIG_NEW_QUANT") eq "yes") {
+    add_proto qw/void highbd_quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+    specialize qw/highbd_quantize_nuq/;
+
+    add_proto qw/void highbd_quantize_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+    specialize qw/highbd_quantize_fp_nuq/;
+
+    add_proto qw/void highbd_quantize_32x32_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+    specialize qw/highbd_quantize_32x32_nuq/;
+
+    add_proto qw/void highbd_quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+    specialize qw/highbd_quantize_32x32_fp_nuq/;
+  }
+
+  add_proto qw/int64_t vp10_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
+  specialize qw/vp10_highbd_block_error sse2/;
+
+  if (vpx_config("CONFIG_AOM_QM") eq "yes") {
+    add_proto qw/void vp10_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
+
+    add_proto qw/void vp10_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
+  } else {
+    add_proto qw/void vp10_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
+    specialize qw/vp10_highbd_quantize_fp sse4_1/;
+
+    add_proto qw/void vp10_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
+    specialize qw/vp10_highbd_quantize_b/;
+  }
+
+  # fdct functions
+  add_proto qw/void vp10_highbd_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_highbd_fht4x4 sse4_1/;
+
+  add_proto qw/void vp10_highbd_fht4x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_highbd_fht4x8/;
+
+  add_proto qw/void vp10_highbd_fht8x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_highbd_fht8x4/;
+
+  add_proto qw/void vp10_highbd_fht8x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_highbd_fht8x16/;
+
+  add_proto qw/void vp10_highbd_fht16x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_highbd_fht16x8/;
+
+  add_proto qw/void vp10_highbd_fht16x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_highbd_fht16x32/;
+
+  add_proto qw/void vp10_highbd_fht32x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_highbd_fht32x16/;
+
+  add_proto qw/void vp10_highbd_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_highbd_fht8x8/;
+
+  add_proto qw/void vp10_highbd_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_highbd_fht16x16/;
+
+  add_proto qw/void vp10_highbd_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/vp10_highbd_fht32x32/;
+
+  add_proto qw/void vp10_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
+  specialize qw/vp10_highbd_fwht4x4/;
+
+  add_proto qw/void vp10_highbd_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
+  specialize qw/vp10_highbd_temporal_filter_apply/;
+
+}
+# End vp10_high encoder functions
+
+if (vpx_config("CONFIG_EXT_INTER") eq "yes") {
+  add_proto qw/uint64_t vp10_wedge_sse_from_residuals/, "const int16_t *r1, const int16_t *d, const uint8_t *m, int N";
+  specialize qw/vp10_wedge_sse_from_residuals sse2/;
+  add_proto qw/int vp10_wedge_sign_from_residuals/, "const int16_t *ds, const uint8_t *m, int N, int64_t limit";
+  specialize qw/vp10_wedge_sign_from_residuals sse2/;
+  add_proto qw/void vp10_wedge_compute_delta_squares/, "int16_t *d, const int16_t *a, const int16_t *b, int N";
+  specialize qw/vp10_wedge_compute_delta_squares sse2/;
+}
+
+}
+# end encoder functions
+1;
diff --git a/av1/common/vp10_txfm.h b/av1/common/vp10_txfm.h
new file mode 100644
index 0000000..bfeb3ea
--- /dev/null
+++ b/av1/common/vp10_txfm.h
@@ -0,0 +1,207 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef VP10_TXFM_H_
+#define VP10_TXFM_H_
+
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+
+#include "av1/common/enums.h"
+#include "aom/vpx_integer.h"
+#include "aom_dsp/vpx_dsp_common.h"
+
+static const int cos_bit_min = 10;
+static const int cos_bit_max = 16;
+
+// cospi_arr[i][j] = (int)round(cos(M_PI*j/128) * (1<<(cos_bit_min+i)));
+static const int32_t cospi_arr[7][64] = {
+  { 1024, 1024, 1023, 1021, 1019, 1016, 1013, 1009, 1004, 999, 993, 987, 980,
+    972, 964, 955, 946, 936, 926, 915, 903, 891, 878, 865, 851, 837, 822, 807,
+    792, 775, 759, 742, 724, 706, 688, 669, 650, 630, 610, 590, 569, 548, 526,
+    505, 483, 460, 438, 415, 392, 369, 345, 321, 297, 273, 249, 224, 200, 175,
+    150, 125, 100, 75, 50, 25 },
+  { 2048, 2047, 2046, 2042, 2038, 2033, 2026, 2018, 2009, 1998, 1987, 1974,
+    1960, 1945, 1928, 1911, 1892, 1872, 1851, 1829, 1806, 1782, 1757, 1730,
+    1703, 1674, 1645, 1615, 1583, 1551, 1517, 1483, 1448, 1412, 1375, 1338,
+    1299, 1260, 1220, 1179, 1138, 1096, 1053, 1009, 965, 921, 876, 830, 784,
+    737, 690, 642, 595, 546, 498, 449, 400, 350, 301, 251, 201, 151, 100, 50 },
+  { 4096, 4095, 4091, 4085, 4076, 4065, 4052, 4036, 4017, 3996, 3973, 3948,
+    3920, 3889, 3857, 3822, 3784, 3745, 3703, 3659, 3612, 3564, 3513, 3461,
+    3406, 3349, 3290, 3229, 3166, 3102, 3035, 2967, 2896, 2824, 2751, 2675,
+    2598, 2520, 2440, 2359, 2276, 2191, 2106, 2019, 1931, 1842, 1751, 1660,
+    1567, 1474, 1380, 1285, 1189, 1092, 995, 897, 799, 700, 601, 501, 401, 301,
+    201, 101 },
+  { 8192, 8190, 8182, 8170, 8153, 8130, 8103, 8071, 8035, 7993, 7946, 7895,
+    7839, 7779, 7713, 7643, 7568, 7489, 7405, 7317, 7225, 7128, 7027, 6921,
+    6811, 6698, 6580, 6458, 6333, 6203, 6070, 5933, 5793, 5649, 5501, 5351,
+    5197, 5040, 4880, 4717, 4551, 4383, 4212, 4038, 3862, 3683, 3503, 3320,
+    3135, 2948, 2760, 2570, 2378, 2185, 1990, 1795, 1598, 1401, 1202, 1003, 803,
+    603, 402, 201 },
+  { 16384, 16379, 16364, 16340, 16305, 16261, 16207, 16143, 16069, 15986, 15893,
+    15791, 15679, 15557, 15426, 15286, 15137, 14978, 14811, 14635, 14449, 14256,
+    14053, 13842, 13623, 13395, 13160, 12916, 12665, 12406, 12140, 11866, 11585,
+    11297, 11003, 10702, 10394, 10080, 9760, 9434, 9102, 8765, 8423, 8076, 7723,
+    7366, 7005, 6639, 6270, 5897, 5520, 5139, 4756, 4370, 3981, 3590, 3196,
+    2801, 2404, 2006, 1606, 1205, 804, 402 },
+  { 32768, 32758, 32729, 32679, 32610, 32522, 32413, 32286, 32138, 31972, 31786,
+    31581, 31357, 31114, 30853, 30572, 30274, 29957, 29622, 29269, 28899, 28511,
+    28106, 27684, 27246, 26791, 26320, 25833, 25330, 24812, 24279, 23732, 23170,
+    22595, 22006, 21403, 20788, 20160, 19520, 18868, 18205, 17531, 16846, 16151,
+    15447, 14733, 14010, 13279, 12540, 11793, 11039, 10279, 9512, 8740, 7962,
+    7180, 6393, 5602, 4808, 4011, 3212, 2411, 1608, 804 },
+  { 65536, 65516, 65457, 65358, 65220, 65043, 64827, 64571, 64277, 63944, 63572,
+    63162, 62714, 62228, 61705, 61145, 60547, 59914, 59244, 58538, 57798, 57022,
+    56212, 55368, 54491, 53581, 52639, 51665, 50660, 49624, 48559, 47464, 46341,
+    45190, 44011, 42806, 41576, 40320, 39040, 37736, 36410, 35062, 33692, 32303,
+    30893, 29466, 28020, 26558, 25080, 23586, 22078, 20557, 19024, 17479, 15924,
+    14359, 12785, 11204, 9616, 8022, 6424, 4821, 3216, 1608 }
+};
+
+static INLINE int32_t round_shift(int32_t value, int bit) {
+  return (value + (1 << (bit - 1))) >> bit;
+}
+
+static INLINE void round_shift_array(int32_t *arr, int size, int bit) {
+  int i;
+  if (bit == 0) {
+    return;
+  } else {
+    if (bit > 0) {
+      for (i = 0; i < size; i++) {
+        arr[i] = round_shift(arr[i], bit);
+      }
+    } else {
+      for (i = 0; i < size; i++) {
+        arr[i] = arr[i] << (-bit);
+      }
+    }
+  }
+}
+
+static INLINE int32_t half_btf(int32_t w0, int32_t in0, int32_t w1, int32_t in1,
+                               int bit) {
+  int32_t result_32 = w0 * in0 + w1 * in1;
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+  int64_t result_64 = (int64_t)w0 * (int64_t)in0 + (int64_t)w1 * (int64_t)in1;
+  if (result_32 != result_64) {
+    printf("%s %d overflow result_32: %d result_64: %" PRId64
+           " w0: %d in0: %d w1: %d in1: "
+           "%d\n",
+           __FILE__, __LINE__, result_32, result_64, w0, in0, w1, in1);
+    assert(0 && "half_btf overflow");
+  }
+#endif
+  return round_shift(result_32, bit);
+}
+
+static INLINE int get_max_bit(int x) {
+  int max_bit = -1;
+  while (x) {
+    x = x >> 1;
+    max_bit++;
+  }
+  return max_bit;
+}
+
+// TODO(angiebird): implement SSE
+static INLINE void clamp_block(int16_t *block, int block_size, int stride,
+                               int low, int high) {
+  int i, j;
+  for (i = 0; i < block_size; ++i) {
+    for (j = 0; j < block_size; ++j) {
+      block[i * stride + j] = clamp(block[i * stride + j], low, high);
+    }
+  }
+}
+
+typedef void (*TxfmFunc)(const int32_t *input, int32_t *output,
+                         const int8_t *cos_bit, const int8_t *stage_range);
+
+typedef enum TXFM_TYPE {
+  TXFM_TYPE_DCT4,
+  TXFM_TYPE_DCT8,
+  TXFM_TYPE_DCT16,
+  TXFM_TYPE_DCT32,
+  TXFM_TYPE_DCT64,
+  TXFM_TYPE_ADST4,
+  TXFM_TYPE_ADST8,
+  TXFM_TYPE_ADST16,
+  TXFM_TYPE_ADST32,
+} TXFM_TYPE;
+
+typedef struct TXFM_2D_CFG {
+  const int txfm_size;
+  const int stage_num_col;
+  const int stage_num_row;
+
+  const int8_t *shift;
+  const int8_t *stage_range_col;
+  const int8_t *stage_range_row;
+  const int8_t *cos_bit_col;
+  const int8_t *cos_bit_row;
+  const TXFM_TYPE txfm_type_col;
+  const TXFM_TYPE txfm_type_row;
+} TXFM_2D_CFG;
+
+typedef struct TXFM_2D_FLIP_CFG {
+  int ud_flip;  // flip upside down
+  int lr_flip;  // flip left to right
+  const TXFM_2D_CFG *cfg;
+} TXFM_2D_FLIP_CFG;
+
+static INLINE void set_flip_cfg(int tx_type, TXFM_2D_FLIP_CFG *cfg) {
+  switch (tx_type) {
+    case DCT_DCT:
+    case ADST_DCT:
+    case DCT_ADST:
+    case ADST_ADST:
+      cfg->ud_flip = 0;
+      cfg->lr_flip = 0;
+      break;
+#if CONFIG_EXT_TX
+    case FLIPADST_DCT:
+      cfg->ud_flip = 1;
+      cfg->lr_flip = 0;
+      break;
+    case DCT_FLIPADST:
+      cfg->ud_flip = 0;
+      cfg->lr_flip = 1;
+      break;
+    case FLIPADST_FLIPADST:
+      cfg->ud_flip = 1;
+      cfg->lr_flip = 1;
+      break;
+    case ADST_FLIPADST:
+      cfg->ud_flip = 0;
+      cfg->lr_flip = 1;
+      break;
+    case FLIPADST_ADST:
+      cfg->ud_flip = 1;
+      cfg->lr_flip = 0;
+      break;
+#endif  // CONFIG_EXT_TX
+    default:
+      cfg->ud_flip = 0;
+      cfg->lr_flip = 0;
+      assert(0);
+  }
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+TXFM_2D_FLIP_CFG vp10_get_fwd_txfm_cfg(int tx_type, int tx_size);
+TXFM_2D_FLIP_CFG vp10_get_fwd_txfm_64x64_cfg(int tx_type);
+#ifdef __cplusplus
+}
+#endif  // __cplusplus
+
+#endif  // VP10_TXFM_H_
diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
new file mode 100644
index 0000000..5f76453
--- /dev/null
+++ b/av1/common/warped_motion.c
@@ -0,0 +1,642 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be
+ *  found  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <math.h>
+#include <assert.h>
+
+#include "av1/common/warped_motion.h"
+
+static ProjectPointsType get_project_points_type(TransformationType type) {
+  switch (type) {
+    case HOMOGRAPHY: return projectPointsHomography;
+    case AFFINE: return projectPointsAffine;
+    case ROTZOOM: return projectPointsRotZoom;
+    case TRANSLATION: return projectPointsTranslation;
+    default: assert(0); return NULL;
+  }
+}
+
+void projectPointsTranslation(int *mat, int *points, int *proj, const int n,
+                              const int stride_points, const int stride_proj,
+                              const int subsampling_x,
+                              const int subsampling_y) {
+  int i;
+  for (i = 0; i < n; ++i) {
+    const int x = *(points++), y = *(points++);
+    if (subsampling_x)
+      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
+          ((x << (WARPEDMODEL_PREC_BITS + 1)) + mat[0]),
+          WARPEDDIFF_PREC_BITS + 1);
+    else
+      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
+          ((x << WARPEDMODEL_PREC_BITS)) + mat[0], WARPEDDIFF_PREC_BITS);
+    if (subsampling_y)
+      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
+          ((y << (WARPEDMODEL_PREC_BITS + 1)) + mat[1]),
+          WARPEDDIFF_PREC_BITS + 1);
+    else
+      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
+          ((y << WARPEDMODEL_PREC_BITS)) + mat[1], WARPEDDIFF_PREC_BITS);
+    points += stride_points - 2;
+    proj += stride_proj - 2;
+  }
+}
+
+void projectPointsRotZoom(int *mat, int *points, int *proj, const int n,
+                          const int stride_points, const int stride_proj,
+                          const int subsampling_x, const int subsampling_y) {
+  int i;
+  for (i = 0; i < n; ++i) {
+    const int x = *(points++), y = *(points++);
+    if (subsampling_x)
+      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
+          mat[2] * 2 * x + mat[3] * 2 * y + mat[0] +
+              (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
+          WARPEDDIFF_PREC_BITS + 1);
+    else
+      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[2] * x + mat[3] * y + mat[0],
+                                            WARPEDDIFF_PREC_BITS);
+    if (subsampling_y)
+      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
+          -mat[3] * 2 * x + mat[2] * 2 * y + mat[1] +
+              (-mat[3] + mat[2] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
+          WARPEDDIFF_PREC_BITS + 1);
+    else
+      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(-mat[3] * x + mat[2] * y + mat[1],
+                                            WARPEDDIFF_PREC_BITS);
+    points += stride_points - 2;
+    proj += stride_proj - 2;
+  }
+}
+
+void projectPointsAffine(int *mat, int *points, int *proj, const int n,
+                         const int stride_points, const int stride_proj,
+                         const int subsampling_x, const int subsampling_y) {
+  int i;
+  for (i = 0; i < n; ++i) {
+    const int x = *(points++), y = *(points++);
+    if (subsampling_x)
+      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
+          mat[2] * 2 * x + mat[3] * 2 * y + mat[0] +
+              (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
+          WARPEDDIFF_PREC_BITS + 1);
+    else
+      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[2] * x + mat[3] * y + mat[0],
+                                            WARPEDDIFF_PREC_BITS);
+    if (subsampling_y)
+      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
+          mat[4] * 2 * x + mat[5] * 2 * y + mat[1] +
+              (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
+          WARPEDDIFF_PREC_BITS + 1);
+    else
+      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[4] * x + mat[5] * y + mat[1],
+                                            WARPEDDIFF_PREC_BITS);
+    points += stride_points - 2;
+    proj += stride_proj - 2;
+  }
+}
+
+void projectPointsHomography(int *mat, int *points, int *proj, const int n,
+                             const int stride_points, const int stride_proj,
+                             const int subsampling_x, const int subsampling_y) {
+  int i;
+  int64_t x, y, Z;
+  int64_t xp, yp;
+  for (i = 0; i < n; ++i) {
+    x = *(points++), y = *(points++);
+    x = (subsampling_x ? 4 * x + 1 : 2 * x);
+    y = (subsampling_y ? 4 * y + 1 : 2 * y);
+
+    Z = (mat[6] * x + mat[7] * y + (1 << (WARPEDMODEL_ROW3HOMO_PREC_BITS + 1)));
+    xp = (mat[0] * x + mat[1] * y + 2 * mat[2])
+         << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
+             WARPEDMODEL_PREC_BITS);
+    yp = (mat[3] * x + mat[4] * y + 2 * mat[5])
+         << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
+             WARPEDMODEL_PREC_BITS);
+
+    xp = xp > 0 ? (xp + Z / 2) / Z : (xp - Z / 2) / Z;
+    yp = yp > 0 ? (yp + Z / 2) / Z : (yp - Z / 2) / Z;
+
+    if (subsampling_x) xp = (xp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
+    if (subsampling_y) yp = (yp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
+    *(proj++) = xp;
+    *(proj++) = yp;
+
+    points += stride_points - 2;
+    proj += stride_proj - 2;
+  }
+}
+
+static const int16_t filter_4tap[WARPEDPIXEL_PREC_SHIFTS][4] = {
+  { 0, 128, 0, 0 },     { -1, 127, 2, 0 },    { -2, 127, 4, -1 },
+  { -3, 126, 6, -1 },   { -3, 125, 8, -2 },   { -4, 124, 11, -3 },
+  { -5, 123, 13, -3 },  { -5, 121, 15, -3 },  { -6, 120, 18, -4 },
+  { -7, 119, 20, -4 },  { -7, 118, 22, -5 },  { -8, 116, 25, -5 },
+  { -8, 115, 27, -6 },  { -9, 113, 30, -6 },  { -9, 112, 32, -7 },
+  { -9, 110, 34, -7 },  { -10, 108, 37, -7 }, { -10, 107, 39, -8 },
+  { -10, 105, 41, -8 }, { -11, 103, 44, -8 }, { -11, 101, 47, -9 },
+  { -11, 99, 49, -9 },  { -11, 97, 51, -9 },  { -11, 95, 54, -10 },
+  { -11, 93, 56, -10 }, { -12, 91, 59, -10 }, { -12, 89, 61, -10 },
+  { -12, 87, 64, -11 }, { -12, 85, 66, -11 }, { -12, 82, 69, -11 },
+  { -12, 80, 71, -11 }, { -12, 78, 73, -11 }, { -11, 75, 75, -11 },
+  { -11, 73, 78, -12 }, { -11, 71, 80, -12 }, { -11, 69, 82, -12 },
+  { -11, 66, 85, -12 }, { -11, 64, 87, -12 }, { -10, 61, 89, -12 },
+  { -10, 59, 91, -12 }, { -10, 56, 93, -11 }, { -10, 54, 95, -11 },
+  { -9, 51, 97, -11 },  { -9, 49, 99, -11 },  { -9, 47, 101, -11 },
+  { -8, 44, 103, -11 }, { -8, 41, 105, -10 }, { -8, 39, 107, -10 },
+  { -7, 37, 108, -10 }, { -7, 34, 110, -9 },  { -7, 32, 112, -9 },
+  { -6, 30, 113, -9 },  { -6, 27, 115, -8 },  { -5, 25, 116, -8 },
+  { -5, 22, 118, -7 },  { -4, 20, 119, -7 },  { -4, 18, 120, -6 },
+  { -3, 15, 121, -5 },  { -3, 13, 123, -5 },  { -3, 11, 124, -4 },
+  { -2, 8, 125, -3 },   { -1, 6, 126, -3 },   { -1, 4, 127, -2 },
+  { 0, 2, 127, -1 },
+};
+
+static const int16_t
+    filter_ntap[WARPEDPIXEL_PREC_SHIFTS][WARPEDPIXEL_FILTER_TAPS] = {
+      { 0, 0, 128, 0, 0, 0 },      { 0, -1, 128, 2, -1, 0 },
+      { 1, -3, 127, 4, -1, 0 },    { 1, -4, 126, 6, -2, 1 },
+      { 1, -5, 126, 8, -3, 1 },    { 1, -6, 125, 11, -4, 1 },
+      { 1, -7, 124, 13, -4, 1 },   { 2, -8, 123, 15, -5, 1 },
+      { 2, -9, 122, 18, -6, 1 },   { 2, -10, 121, 20, -6, 1 },
+      { 2, -11, 120, 22, -7, 2 },  { 2, -12, 119, 25, -8, 2 },
+      { 3, -13, 117, 27, -8, 2 },  { 3, -13, 116, 29, -9, 2 },
+      { 3, -14, 114, 32, -10, 3 }, { 3, -15, 113, 35, -10, 2 },
+      { 3, -15, 111, 37, -11, 3 }, { 3, -16, 109, 40, -11, 3 },
+      { 3, -16, 108, 42, -12, 3 }, { 4, -17, 106, 45, -13, 3 },
+      { 4, -17, 104, 47, -13, 3 }, { 4, -17, 102, 50, -14, 3 },
+      { 4, -17, 100, 52, -14, 3 }, { 4, -18, 98, 55, -15, 4 },
+      { 4, -18, 96, 58, -15, 3 },  { 4, -18, 94, 60, -16, 4 },
+      { 4, -18, 91, 63, -16, 4 },  { 4, -18, 89, 65, -16, 4 },
+      { 4, -18, 87, 68, -17, 4 },  { 4, -18, 85, 70, -17, 4 },
+      { 4, -18, 82, 73, -17, 4 },  { 4, -18, 80, 75, -17, 4 },
+      { 4, -18, 78, 78, -18, 4 },  { 4, -17, 75, 80, -18, 4 },
+      { 4, -17, 73, 82, -18, 4 },  { 4, -17, 70, 85, -18, 4 },
+      { 4, -17, 68, 87, -18, 4 },  { 4, -16, 65, 89, -18, 4 },
+      { 4, -16, 63, 91, -18, 4 },  { 4, -16, 60, 94, -18, 4 },
+      { 3, -15, 58, 96, -18, 4 },  { 4, -15, 55, 98, -18, 4 },
+      { 3, -14, 52, 100, -17, 4 }, { 3, -14, 50, 102, -17, 4 },
+      { 3, -13, 47, 104, -17, 4 }, { 3, -13, 45, 106, -17, 4 },
+      { 3, -12, 42, 108, -16, 3 }, { 3, -11, 40, 109, -16, 3 },
+      { 3, -11, 37, 111, -15, 3 }, { 2, -10, 35, 113, -15, 3 },
+      { 3, -10, 32, 114, -14, 3 }, { 2, -9, 29, 116, -13, 3 },
+      { 2, -8, 27, 117, -13, 3 },  { 2, -8, 25, 119, -12, 2 },
+      { 2, -7, 22, 120, -11, 2 },  { 1, -6, 20, 121, -10, 2 },
+      { 1, -6, 18, 122, -9, 2 },   { 1, -5, 15, 123, -8, 2 },
+      { 1, -4, 13, 124, -7, 1 },   { 1, -4, 11, 125, -6, 1 },
+      { 1, -3, 8, 126, -5, 1 },    { 1, -2, 6, 126, -4, 1 },
+      { 0, -1, 4, 127, -3, 1 },    { 0, -1, 2, 128, -1, 0 },
+    };
+
+static int32_t do_ntap_filter(int32_t *p, int x) {
+  int i;
+  int32_t sum = 0;
+  for (i = 0; i < WARPEDPIXEL_FILTER_TAPS; ++i) {
+    sum += p[i - WARPEDPIXEL_FILTER_TAPS / 2 + 1] * filter_ntap[x][i];
+  }
+  return sum;
+}
+
+static int32_t do_cubic_filter(int32_t *p, int x) {
+  if (x == 0) {
+    return p[0];
+  } else if (x == (1 << WARPEDPIXEL_PREC_BITS)) {
+    return p[1];
+  } else {
+    const int64_t v1 = x * x * x * (3 * (p[0] - p[1]) + p[2] - p[-1]);
+    const int64_t v2 = x * x * (2 * p[-1] - 5 * p[0] + 4 * p[1] - p[2]);
+    const int64_t v3 = x * (p[1] - p[-1]);
+    const int64_t v4 = 2 * p[0];
+    return (int32_t)ROUND_POWER_OF_TWO_SIGNED(
+        (v4 << (3 * WARPEDPIXEL_PREC_BITS)) +
+            (v3 << (2 * WARPEDPIXEL_PREC_BITS)) +
+            (v2 << WARPEDPIXEL_PREC_BITS) + v1,
+        3 * WARPEDPIXEL_PREC_BITS + 1 - WARPEDPIXEL_FILTER_BITS);
+  }
+}
+
+/*
+static int32_t do_linear_filter(int32_t *p, int x) {
+  int32_t sum = 0;
+  sum = p[0] * (WARPEDPIXEL_PREC_SHIFTS - x) + p[1] * x;
+  sum <<= (WARPEDPIXEL_FILTER_BITS - WARPEDPIXEL_PREC_BITS);
+  return sum;
+}
+
+static int32_t do_4tap_filter(int32_t *p, int x) {
+  int i;
+  int32_t sum = 0;
+  for (i = 0; i < 4; ++i) {
+    sum += p[i - 1] * filter_4tap[x][i];
+  }
+  return sum;
+}
+*/
+
+static INLINE void get_subcolumn(int taps, uint8_t *ref, int32_t *col,
+                                 int stride, int x, int y_start) {
+  int i;
+  for (i = 0; i < taps; ++i) {
+    col[i] = ref[(i + y_start) * stride + x];
+  }
+}
+
+static uint8_t bi_ntap_filter(uint8_t *ref, int x, int y, int stride) {
+  int32_t val, arr[WARPEDPIXEL_FILTER_TAPS];
+  int k;
+  int i = (int)x >> WARPEDPIXEL_PREC_BITS;
+  int j = (int)y >> WARPEDPIXEL_PREC_BITS;
+  for (k = 0; k < WARPEDPIXEL_FILTER_TAPS; ++k) {
+    int32_t arr_temp[WARPEDPIXEL_FILTER_TAPS];
+    get_subcolumn(WARPEDPIXEL_FILTER_TAPS, ref, arr_temp, stride,
+                  i + k + 1 - WARPEDPIXEL_FILTER_TAPS / 2,
+                  j + 1 - WARPEDPIXEL_FILTER_TAPS / 2);
+    arr[k] = do_ntap_filter(arr_temp + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
+                            y - (j << WARPEDPIXEL_PREC_BITS));
+  }
+  val = do_ntap_filter(arr + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
+                       x - (i << WARPEDPIXEL_PREC_BITS));
+  val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
+  return (uint8_t)clip_pixel(val);
+}
+
+static uint8_t bi_cubic_filter(uint8_t *ref, int x, int y, int stride) {
+  int32_t val, arr[4];
+  int k;
+  int i = (int)x >> WARPEDPIXEL_PREC_BITS;
+  int j = (int)y >> WARPEDPIXEL_PREC_BITS;
+  for (k = 0; k < 4; ++k) {
+    int32_t arr_temp[4];
+    get_subcolumn(4, ref, arr_temp, stride, i + k - 1, j - 1);
+    arr[k] = do_cubic_filter(arr_temp + 1, y - (j << WARPEDPIXEL_PREC_BITS));
+  }
+  val = do_cubic_filter(arr + 1, x - (i << WARPEDPIXEL_PREC_BITS));
+  val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
+  return (uint8_t)clip_pixel(val);
+}
+
+static uint8_t bi_linear_filter(uint8_t *ref, int x, int y, int stride) {
+  const int ix = x >> WARPEDPIXEL_PREC_BITS;
+  const int iy = y >> WARPEDPIXEL_PREC_BITS;
+  const int sx = x - (ix << WARPEDPIXEL_PREC_BITS);
+  const int sy = y - (iy << WARPEDPIXEL_PREC_BITS);
+  int32_t val;
+  val = ROUND_POWER_OF_TWO_SIGNED(
+      ref[iy * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sy) *
+              (WARPEDPIXEL_PREC_SHIFTS - sx) +
+          ref[iy * stride + ix + 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) * sx +
+          ref[(iy + 1) * stride + ix] * sy * (WARPEDPIXEL_PREC_SHIFTS - sx) +
+          ref[(iy + 1) * stride + ix + 1] * sy * sx,
+      WARPEDPIXEL_PREC_BITS * 2);
+  return (uint8_t)clip_pixel(val);
+}
+
+static uint8_t warp_interpolate(uint8_t *ref, int x, int y, int width,
+                                int height, int stride) {
+  int ix = x >> WARPEDPIXEL_PREC_BITS;
+  int iy = y >> WARPEDPIXEL_PREC_BITS;
+  int sx = x - (ix << WARPEDPIXEL_PREC_BITS);
+  int sy = y - (iy << WARPEDPIXEL_PREC_BITS);
+  int32_t v;
+
+  if (ix < 0 && iy < 0)
+    return ref[0];
+  else if (ix < 0 && iy > height - 1)
+    return ref[(height - 1) * stride];
+  else if (ix > width - 1 && iy < 0)
+    return ref[width - 1];
+  else if (ix > width - 1 && iy > height - 1)
+    return ref[(height - 1) * stride + (width - 1)];
+  else if (ix < 0) {
+    v = ROUND_POWER_OF_TWO_SIGNED(
+        ref[iy * stride] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
+            ref[(iy + 1) * stride] * sy,
+        WARPEDPIXEL_PREC_BITS);
+    return clip_pixel(v);
+  } else if (iy < 0) {
+    v = ROUND_POWER_OF_TWO_SIGNED(
+        ref[ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) + ref[ix + 1] * sx,
+        WARPEDPIXEL_PREC_BITS);
+    return clip_pixel(v);
+  } else if (ix > width - 1) {
+    v = ROUND_POWER_OF_TWO_SIGNED(
+        ref[iy * stride + width - 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
+            ref[(iy + 1) * stride + width - 1] * sy,
+        WARPEDPIXEL_PREC_BITS);
+    return clip_pixel(v);
+  } else if (iy > height - 1) {
+    v = ROUND_POWER_OF_TWO_SIGNED(
+        ref[(height - 1) * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) +
+            ref[(height - 1) * stride + ix + 1] * sx,
+        WARPEDPIXEL_PREC_BITS);
+    return clip_pixel(v);
+  } else if (ix >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
+             iy >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
+             ix < width - WARPEDPIXEL_FILTER_TAPS / 2 &&
+             iy < height - WARPEDPIXEL_FILTER_TAPS / 2) {
+    return bi_ntap_filter(ref, x, y, stride);
+  } else if (ix >= 1 && iy >= 1 && ix < width - 2 && iy < height - 2) {
+    return bi_cubic_filter(ref, x, y, stride);
+  } else {
+    return bi_linear_filter(ref, x, y, stride);
+  }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE void highbd_get_subcolumn(int taps, uint16_t *ref, int32_t *col,
+                                        int stride, int x, int y_start) {
+  int i;
+  for (i = 0; i < taps; ++i) {
+    col[i] = ref[(i + y_start) * stride + x];
+  }
+}
+
+static uint16_t highbd_bi_ntap_filter(uint16_t *ref, int x, int y, int stride,
+                                      int bd) {
+  int32_t val, arr[WARPEDPIXEL_FILTER_TAPS];
+  int k;
+  int i = (int)x >> WARPEDPIXEL_PREC_BITS;
+  int j = (int)y >> WARPEDPIXEL_PREC_BITS;
+  for (k = 0; k < WARPEDPIXEL_FILTER_TAPS; ++k) {
+    int32_t arr_temp[WARPEDPIXEL_FILTER_TAPS];
+    highbd_get_subcolumn(WARPEDPIXEL_FILTER_TAPS, ref, arr_temp, stride,
+                         i + k + 1 - WARPEDPIXEL_FILTER_TAPS / 2,
+                         j + 1 - WARPEDPIXEL_FILTER_TAPS / 2);
+    arr[k] = do_ntap_filter(arr_temp + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
+                            y - (j << WARPEDPIXEL_PREC_BITS));
+  }
+  val = do_ntap_filter(arr + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
+                       x - (i << WARPEDPIXEL_PREC_BITS));
+  val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
+  return (uint16_t)clip_pixel_highbd(val, bd);
+}
+
+static uint16_t highbd_bi_cubic_filter(uint16_t *ref, int x, int y, int stride,
+                                       int bd) {
+  int32_t val, arr[4];
+  int k;
+  int i = (int)x >> WARPEDPIXEL_PREC_BITS;
+  int j = (int)y >> WARPEDPIXEL_PREC_BITS;
+  for (k = 0; k < 4; ++k) {
+    int32_t arr_temp[4];
+    highbd_get_subcolumn(4, ref, arr_temp, stride, i + k - 1, j - 1);
+    arr[k] = do_cubic_filter(arr_temp + 1, y - (j << WARPEDPIXEL_PREC_BITS));
+  }
+  val = do_cubic_filter(arr + 1, x - (i << WARPEDPIXEL_PREC_BITS));
+  val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
+  return (uint16_t)clip_pixel_highbd(val, bd);
+}
+
+static uint16_t highbd_bi_linear_filter(uint16_t *ref, int x, int y, int stride,
+                                        int bd) {
+  const int ix = x >> WARPEDPIXEL_PREC_BITS;
+  const int iy = y >> WARPEDPIXEL_PREC_BITS;
+  const int sx = x - (ix << WARPEDPIXEL_PREC_BITS);
+  const int sy = y - (iy << WARPEDPIXEL_PREC_BITS);
+  int32_t val;
+  val = ROUND_POWER_OF_TWO_SIGNED(
+      ref[iy * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sy) *
+              (WARPEDPIXEL_PREC_SHIFTS - sx) +
+          ref[iy * stride + ix + 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) * sx +
+          ref[(iy + 1) * stride + ix] * sy * (WARPEDPIXEL_PREC_SHIFTS - sx) +
+          ref[(iy + 1) * stride + ix + 1] * sy * sx,
+      WARPEDPIXEL_PREC_BITS * 2);
+  return (uint16_t)clip_pixel_highbd(val, bd);
+}
+
+static uint16_t highbd_warp_interpolate(uint16_t *ref, int x, int y, int width,
+                                        int height, int stride, int bd) {
+  int ix = x >> WARPEDPIXEL_PREC_BITS;
+  int iy = y >> WARPEDPIXEL_PREC_BITS;
+  int sx = x - (ix << WARPEDPIXEL_PREC_BITS);
+  int sy = y - (iy << WARPEDPIXEL_PREC_BITS);
+  int32_t v;
+
+  if (ix < 0 && iy < 0)
+    return ref[0];
+  else if (ix < 0 && iy > height - 1)
+    return ref[(height - 1) * stride];
+  else if (ix > width - 1 && iy < 0)
+    return ref[width - 1];
+  else if (ix > width - 1 && iy > height - 1)
+    return ref[(height - 1) * stride + (width - 1)];
+  else if (ix < 0) {
+    v = ROUND_POWER_OF_TWO_SIGNED(
+        ref[iy * stride] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
+            ref[(iy + 1) * stride] * sy,
+        WARPEDPIXEL_PREC_BITS);
+    return clip_pixel_highbd(v, bd);
+  } else if (iy < 0) {
+    v = ROUND_POWER_OF_TWO_SIGNED(
+        ref[ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) + ref[ix + 1] * sx,
+        WARPEDPIXEL_PREC_BITS);
+    return clip_pixel_highbd(v, bd);
+  } else if (ix > width - 1) {
+    v = ROUND_POWER_OF_TWO_SIGNED(
+        ref[iy * stride + width - 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
+            ref[(iy + 1) * stride + width - 1] * sy,
+        WARPEDPIXEL_PREC_BITS);
+    return clip_pixel_highbd(v, bd);
+  } else if (iy > height - 1) {
+    v = ROUND_POWER_OF_TWO_SIGNED(
+        ref[(height - 1) * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) +
+            ref[(height - 1) * stride + ix + 1] * sx,
+        WARPEDPIXEL_PREC_BITS);
+    return clip_pixel_highbd(v, bd);
+  } else if (ix >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
+             iy >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
+             ix < width - WARPEDPIXEL_FILTER_TAPS / 2 &&
+             iy < height - WARPEDPIXEL_FILTER_TAPS / 2) {
+    return highbd_bi_ntap_filter(ref, x, y, stride, bd);
+  } else if (ix >= 1 && iy >= 1 && ix < width - 2 && iy < height - 2) {
+    return highbd_bi_cubic_filter(ref, x, y, stride, bd);
+  } else {
+    return highbd_bi_linear_filter(ref, x, y, stride, bd);
+  }
+}
+
+static double highbd_warp_erroradv(WarpedMotionParams *wm, uint8_t *ref8,
+                                   int width, int height, int stride,
+                                   uint8_t *dst8, int p_col, int p_row,
+                                   int p_width, int p_height, int p_stride,
+                                   int subsampling_x, int subsampling_y,
+                                   int x_scale, int y_scale, int bd) {
+  int i, j;
+  ProjectPointsType projectpoints = get_project_points_type(wm->wmtype);
+  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+  int gm_err = 0, no_gm_err = 0;
+  int gm_sumerr = 0, no_gm_sumerr = 0;
+  for (i = p_row; i < p_row + p_height; ++i) {
+    for (j = p_col; j < p_col + p_width; ++j) {
+      int in[2], out[2];
+      in[0] = j;
+      in[1] = i;
+      projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
+      out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
+      out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
+      gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
+               highbd_warp_interpolate(ref, out[0], out[1], width, height,
+                                       stride, bd);
+      no_gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
+                  ref[(j - p_col) + (i - p_row) * stride];
+      gm_sumerr += gm_err * gm_err;
+      no_gm_sumerr += no_gm_err * no_gm_err;
+    }
+  }
+  return (double)gm_sumerr / no_gm_sumerr;
+}
+
+static void highbd_warp_plane(WarpedMotionParams *wm, uint8_t *ref8, int width,
+                              int height, int stride, uint8_t *pred8, int p_col,
+                              int p_row, int p_width, int p_height,
+                              int p_stride, int subsampling_x,
+                              int subsampling_y, int x_scale, int y_scale,
+                              int bd) {
+  int i, j;
+  ProjectPointsType projectpoints = get_project_points_type(wm->wmtype);
+  uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
+  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+  if (projectpoints == NULL) return;
+  for (i = p_row; i < p_row + p_height; ++i) {
+    for (j = p_col; j < p_col + p_width; ++j) {
+      int in[2], out[2];
+      in[0] = j;
+      in[1] = i;
+      projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
+      out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
+      out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
+      pred[(j - p_col) + (i - p_row) * p_stride] = highbd_warp_interpolate(
+          ref, out[0], out[1], width, height, stride, bd);
+    }
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+static double warp_erroradv(WarpedMotionParams *wm, uint8_t *ref, int width,
+                            int height, int stride, uint8_t *dst, int p_col,
+                            int p_row, int p_width, int p_height, int p_stride,
+                            int subsampling_x, int subsampling_y, int x_scale,
+                            int y_scale) {
+  int gm_err = 0, no_gm_err = 0;
+  int gm_sumerr = 0, no_gm_sumerr = 0;
+  int i, j;
+  ProjectPointsType projectpoints = get_project_points_type(wm->wmtype);
+  for (i = p_row; i < p_row + p_height; ++i) {
+    for (j = p_col; j < p_col + p_width; ++j) {
+      int in[2], out[2];
+      in[0] = j;
+      in[1] = i;
+      projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
+      out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
+      out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
+      gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
+               warp_interpolate(ref, out[0], out[1], width, height, stride);
+      no_gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
+                  ref[(j - p_col) + (i - p_row) * stride];
+      gm_sumerr += gm_err * gm_err;
+      no_gm_sumerr += no_gm_err * no_gm_err;
+    }
+  }
+  return (double)gm_sumerr / no_gm_sumerr;
+}
+
+static void warp_plane(WarpedMotionParams *wm, uint8_t *ref, int width,
+                       int height, int stride, uint8_t *pred, int p_col,
+                       int p_row, int p_width, int p_height, int p_stride,
+                       int subsampling_x, int subsampling_y, int x_scale,
+                       int y_scale) {
+  int i, j;
+  ProjectPointsType projectpoints = get_project_points_type(wm->wmtype);
+  if (projectpoints == NULL) return;
+  for (i = p_row; i < p_row + p_height; ++i) {
+    for (j = p_col; j < p_col + p_width; ++j) {
+      int in[2], out[2];
+      in[0] = j;
+      in[1] = i;
+      projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
+      out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
+      out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
+      pred[(j - p_col) + (i - p_row) * p_stride] =
+          warp_interpolate(ref, out[0], out[1], width, height, stride);
+    }
+  }
+}
+
+double vp10_warp_erroradv(WarpedMotionParams *wm,
+#if CONFIG_VP9_HIGHBITDEPTH
+                          int use_hbd, int bd,
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+                          uint8_t *ref, int width, int height, int stride,
+                          uint8_t *dst, int p_col, int p_row, int p_width,
+                          int p_height, int p_stride, int subsampling_x,
+                          int subsampling_y, int x_scale, int y_scale) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (use_hbd)
+    return highbd_warp_erroradv(
+        wm, ref, width, height, stride, dst, p_col, p_row, p_width, p_height,
+        p_stride, subsampling_x, subsampling_y, x_scale, y_scale, bd);
+  else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+    return warp_erroradv(wm, ref, width, height, stride, dst, p_col, p_row,
+                         p_width, p_height, p_stride, subsampling_x,
+                         subsampling_y, x_scale, y_scale);
+}
+
+void vp10_warp_plane(WarpedMotionParams *wm,
+#if CONFIG_VP9_HIGHBITDEPTH
+                     int use_hbd, int bd,
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+                     uint8_t *ref, int width, int height, int stride,
+                     uint8_t *pred, int p_col, int p_row, int p_width,
+                     int p_height, int p_stride, int subsampling_x,
+                     int subsampling_y, int x_scale, int y_scale) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (use_hbd)
+    highbd_warp_plane(wm, ref, width, height, stride, pred, p_col, p_row,
+                      p_width, p_height, p_stride, subsampling_x, subsampling_y,
+                      x_scale, y_scale, bd);
+  else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+    warp_plane(wm, ref, width, height, stride, pred, p_col, p_row, p_width,
+               p_height, p_stride, subsampling_x, subsampling_y, x_scale,
+               y_scale);
+}
+
+void vp10_integerize_model(const double *model, TransformationType wmtype,
+                           WarpedMotionParams *wm) {
+  wm->wmtype = wmtype;
+  switch (wmtype) {
+    case HOMOGRAPHY:
+      assert(fabs(model[8] - 1.0) < 1e-12);
+      wm->wmmat[7] =
+          (int)lrint(model[7] * (1 << WARPEDMODEL_ROW3HOMO_PREC_BITS));
+      wm->wmmat[6] =
+          (int)lrint(model[6] * (1 << WARPEDMODEL_ROW3HOMO_PREC_BITS));
+    /* fallthrough intended */
+    case AFFINE:
+      wm->wmmat[5] = (int)lrint(model[5] * (1 << WARPEDMODEL_PREC_BITS));
+      wm->wmmat[4] = (int)lrint(model[4] * (1 << WARPEDMODEL_PREC_BITS));
+    /* fallthrough intended */
+    case ROTZOOM:
+      wm->wmmat[3] = (int)lrint(model[3] * (1 << WARPEDMODEL_PREC_BITS));
+      wm->wmmat[2] = (int)lrint(model[2] * (1 << WARPEDMODEL_PREC_BITS));
+    /* fallthrough intended */
+    case TRANSLATION:
+      wm->wmmat[1] = (int)lrint(model[1] * (1 << WARPEDMODEL_PREC_BITS));
+      wm->wmmat[0] = (int)lrint(model[0] * (1 << WARPEDMODEL_PREC_BITS));
+      break;
+    default: assert(0 && "Invalid TransformationType");
+  }
+}
diff --git a/av1/common/warped_motion.h b/av1/common/warped_motion.h
new file mode 100644
index 0000000..a9c57f9
--- /dev/null
+++ b/av1/common/warped_motion.h
@@ -0,0 +1,96 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be
+ *  found  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_WARPED_MOTION_H
+#define VP10_COMMON_WARPED_MOTION_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <math.h>
+#include <assert.h>
+
+#include "./vpx_config.h"
+#include "aom_ports/mem.h"
+#include "aom_dsp/vpx_dsp_common.h"
+
+// Bits of precision used for the model
+#define WARPEDMODEL_PREC_BITS 8
+#define WARPEDMODEL_ROW3HOMO_PREC_BITS 12
+
+// Bits of subpel precision for warped interpolation
+#define WARPEDPIXEL_PREC_BITS 6
+#define WARPEDPIXEL_PREC_SHIFTS (1 << WARPEDPIXEL_PREC_BITS)
+
+// Taps for ntap filter
+#define WARPEDPIXEL_FILTER_TAPS 6
+
+// Precision of filter taps
+#define WARPEDPIXEL_FILTER_BITS 7
+
+#define WARPEDDIFF_PREC_BITS (WARPEDMODEL_PREC_BITS - WARPEDPIXEL_PREC_BITS)
+
+typedef void (*ProjectPointsType)(int *mat, int *points, int *proj, const int n,
+                                  const int stride_points,
+                                  const int stride_proj,
+                                  const int subsampling_x,
+                                  const int subsampling_y);
+void projectPointsHomography(int *mat, int *points, int *proj, const int n,
+                             const int stride_points, const int stride_proj,
+                             const int subsampling_x, const int subsampling_y);
+void projectPointsAffine(int *mat, int *points, int *proj, const int n,
+                         const int stride_points, const int stride_proj,
+                         const int subsampling_x, const int subsampling_y);
+void projectPointsRotZoom(int *mat, int *points, int *proj, const int n,
+                          const int stride_points, const int stride_proj,
+                          const int subsampling_x, const int subsampling_y);
+void projectPointsTranslation(int *mat, int *points, int *proj, const int n,
+                              const int stride_points, const int stride_proj,
+                              const int subsampling_x, const int subsampling_y);
+
+typedef enum {
+  UNKNOWN_TRANSFORM = -1,
+  HOMOGRAPHY,   // homography, 8-parameter
+  AFFINE,       // affine, 6-parameter
+  ROTZOOM,      // simplified affine with rotation and zoom only, 4-parameter
+  TRANSLATION,  // translational motion 2-parameter
+  TRANS_TYPES
+} TransformationType;
+
+// number of parameters used by each transformation in TransformationTypes
+static const int n_trans_model_params[TRANS_TYPES] = { 9, 6, 4, 2 };
+
+typedef struct {
+  TransformationType wmtype;
+  int wmmat[8];  // For homography wmmat[9] is assumed to be 1
+} WarpedMotionParams;
+
+double vp10_warp_erroradv(WarpedMotionParams *wm,
+#if CONFIG_VP9_HIGHBITDEPTH
+                          int use_hbd, int bd,
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+                          uint8_t *ref, int width, int height, int stride,
+                          uint8_t *dst, int p_col, int p_row, int p_width,
+                          int p_height, int p_stride, int subsampling_x,
+                          int subsampling_y, int x_scale, int y_scale);
+
+void vp10_warp_plane(WarpedMotionParams *wm,
+#if CONFIG_VP9_HIGHBITDEPTH
+                     int use_hbd, int bd,
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+                     uint8_t *ref, int width, int height, int stride,
+                     uint8_t *pred, int p_col, int p_row, int p_width,
+                     int p_height, int p_stride, int subsampling_x,
+                     int subsampling_y, int x_scale, int y_scale);
+
+// Integerize model into the WarpedMotionParams structure
+void vp10_integerize_model(const double *model, TransformationType wmtype,
+                           WarpedMotionParams *wm);
+#endif  // VP10_COMMON_WARPED_MOTION_H
diff --git a/av1/common/x86/highbd_inv_txfm_sse4.c b/av1/common/x86/highbd_inv_txfm_sse4.c
new file mode 100644
index 0000000..f3686eb
--- /dev/null
+++ b/av1/common/x86/highbd_inv_txfm_sse4.c
@@ -0,0 +1,1398 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <smmintrin.h> /* SSE4.1 */
+
+#include "./vp10_rtcd.h"
+#include "./vpx_config.h"
+#include "av1/common/vp10_inv_txfm2d_cfg.h"
+#include "av1/common/x86/highbd_txfm_utility_sse4.h"
+
+static INLINE void load_buffer_4x4(const int32_t *coeff, __m128i *in) {
+  in[0] = _mm_load_si128((const __m128i *)(coeff + 0));
+  in[1] = _mm_load_si128((const __m128i *)(coeff + 4));
+  in[2] = _mm_load_si128((const __m128i *)(coeff + 8));
+  in[3] = _mm_load_si128((const __m128i *)(coeff + 12));
+}
+
+static void idct4x4_sse4_1(__m128i *in, int bit) {
+  const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+  const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+  const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
+  const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
+  const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
+  const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+  __m128i u0, u1, u2, u3;
+  __m128i v0, v1, v2, v3, x, y;
+
+  v0 = _mm_unpacklo_epi32(in[0], in[1]);
+  v1 = _mm_unpackhi_epi32(in[0], in[1]);
+  v2 = _mm_unpacklo_epi32(in[2], in[3]);
+  v3 = _mm_unpackhi_epi32(in[2], in[3]);
+
+  u0 = _mm_unpacklo_epi64(v0, v2);
+  u1 = _mm_unpackhi_epi64(v0, v2);
+  u2 = _mm_unpacklo_epi64(v1, v3);
+  u3 = _mm_unpackhi_epi64(v1, v3);
+
+  x = _mm_mullo_epi32(u0, cospi32);
+  y = _mm_mullo_epi32(u2, cospi32);
+  v0 = _mm_add_epi32(x, y);
+  v0 = _mm_add_epi32(v0, rnding);
+  v0 = _mm_srai_epi32(v0, bit);
+
+  v1 = _mm_sub_epi32(x, y);
+  v1 = _mm_add_epi32(v1, rnding);
+  v1 = _mm_srai_epi32(v1, bit);
+
+  x = _mm_mullo_epi32(u1, cospi48);
+  y = _mm_mullo_epi32(u3, cospim16);
+  v2 = _mm_add_epi32(x, y);
+  v2 = _mm_add_epi32(v2, rnding);
+  v2 = _mm_srai_epi32(v2, bit);
+
+  x = _mm_mullo_epi32(u1, cospi16);
+  y = _mm_mullo_epi32(u3, cospi48);
+  v3 = _mm_add_epi32(x, y);
+  v3 = _mm_add_epi32(v3, rnding);
+  v3 = _mm_srai_epi32(v3, bit);
+
+  in[0] = _mm_add_epi32(v0, v3);
+  in[1] = _mm_add_epi32(v1, v2);
+  in[2] = _mm_sub_epi32(v1, v2);
+  in[3] = _mm_sub_epi32(v0, v3);
+}
+
+static void iadst4x4_sse4_1(__m128i *in, int bit) {
+  const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+  const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+  const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
+  const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
+  const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
+  const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
+  const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
+  const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
+  const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+  const __m128i zero = _mm_setzero_si128();
+  __m128i u0, u1, u2, u3;
+  __m128i v0, v1, v2, v3, x, y;
+
+  v0 = _mm_unpacklo_epi32(in[0], in[1]);
+  v1 = _mm_unpackhi_epi32(in[0], in[1]);
+  v2 = _mm_unpacklo_epi32(in[2], in[3]);
+  v3 = _mm_unpackhi_epi32(in[2], in[3]);
+
+  u0 = _mm_unpacklo_epi64(v0, v2);
+  u1 = _mm_unpackhi_epi64(v0, v2);
+  u2 = _mm_unpacklo_epi64(v1, v3);
+  u3 = _mm_unpackhi_epi64(v1, v3);
+
+  // stage 0
+  // stage 1
+  u1 = _mm_sub_epi32(zero, u1);
+  u3 = _mm_sub_epi32(zero, u3);
+
+  // stage 2
+  v0 = u0;
+  v1 = u3;
+  x = _mm_mullo_epi32(u1, cospi32);
+  y = _mm_mullo_epi32(u2, cospi32);
+  v2 = _mm_add_epi32(x, y);
+  v2 = _mm_add_epi32(v2, rnding);
+  v2 = _mm_srai_epi32(v2, bit);
+
+  v3 = _mm_sub_epi32(x, y);
+  v3 = _mm_add_epi32(v3, rnding);
+  v3 = _mm_srai_epi32(v3, bit);
+
+  // stage 3
+  u0 = _mm_add_epi32(v0, v2);
+  u1 = _mm_add_epi32(v1, v3);
+  u2 = _mm_sub_epi32(v0, v2);
+  u3 = _mm_sub_epi32(v1, v3);
+
+  // stage 4
+  x = _mm_mullo_epi32(u0, cospi8);
+  y = _mm_mullo_epi32(u1, cospi56);
+  in[3] = _mm_add_epi32(x, y);
+  in[3] = _mm_add_epi32(in[3], rnding);
+  in[3] = _mm_srai_epi32(in[3], bit);
+
+  x = _mm_mullo_epi32(u0, cospi56);
+  y = _mm_mullo_epi32(u1, cospim8);
+  in[0] = _mm_add_epi32(x, y);
+  in[0] = _mm_add_epi32(in[0], rnding);
+  in[0] = _mm_srai_epi32(in[0], bit);
+
+  x = _mm_mullo_epi32(u2, cospi40);
+  y = _mm_mullo_epi32(u3, cospi24);
+  in[1] = _mm_add_epi32(x, y);
+  in[1] = _mm_add_epi32(in[1], rnding);
+  in[1] = _mm_srai_epi32(in[1], bit);
+
+  x = _mm_mullo_epi32(u2, cospi24);
+  y = _mm_mullo_epi32(u3, cospim40);
+  in[2] = _mm_add_epi32(x, y);
+  in[2] = _mm_add_epi32(in[2], rnding);
+  in[2] = _mm_srai_epi32(in[2], bit);
+}
+
+static INLINE void round_shift_4x4(__m128i *in, int shift) {
+  __m128i rnding = _mm_set1_epi32(1 << (shift - 1));
+
+  in[0] = _mm_add_epi32(in[0], rnding);
+  in[1] = _mm_add_epi32(in[1], rnding);
+  in[2] = _mm_add_epi32(in[2], rnding);
+  in[3] = _mm_add_epi32(in[3], rnding);
+
+  in[0] = _mm_srai_epi32(in[0], shift);
+  in[1] = _mm_srai_epi32(in[1], shift);
+  in[2] = _mm_srai_epi32(in[2], shift);
+  in[3] = _mm_srai_epi32(in[3], shift);
+}
+
+static INLINE __m128i highbd_clamp_epi16(__m128i u, int bd) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i one = _mm_set1_epi16(1);
+  const __m128i max = _mm_sub_epi16(_mm_slli_epi16(one, bd), one);
+  __m128i clamped, mask;
+
+  mask = _mm_cmpgt_epi16(u, max);
+  clamped = _mm_andnot_si128(mask, u);
+  mask = _mm_and_si128(mask, max);
+  clamped = _mm_or_si128(mask, clamped);
+  mask = _mm_cmpgt_epi16(clamped, zero);
+  clamped = _mm_and_si128(clamped, mask);
+
+  return clamped;
+}
+
+static void write_buffer_4x4(__m128i *in, uint16_t *output, int stride,
+                             int fliplr, int flipud, int shift, int bd) {
+  const __m128i zero = _mm_setzero_si128();
+  __m128i u0, u1, u2, u3;
+  __m128i v0, v1, v2, v3;
+
+  round_shift_4x4(in, shift);
+
+  v0 = _mm_loadl_epi64((__m128i const *)(output + 0 * stride));
+  v1 = _mm_loadl_epi64((__m128i const *)(output + 1 * stride));
+  v2 = _mm_loadl_epi64((__m128i const *)(output + 2 * stride));
+  v3 = _mm_loadl_epi64((__m128i const *)(output + 3 * stride));
+
+  v0 = _mm_unpacklo_epi16(v0, zero);
+  v1 = _mm_unpacklo_epi16(v1, zero);
+  v2 = _mm_unpacklo_epi16(v2, zero);
+  v3 = _mm_unpacklo_epi16(v3, zero);
+
+  if (fliplr) {
+    in[0] = _mm_shuffle_epi32(in[0], 0x1B);
+    in[1] = _mm_shuffle_epi32(in[1], 0x1B);
+    in[2] = _mm_shuffle_epi32(in[2], 0x1B);
+    in[3] = _mm_shuffle_epi32(in[3], 0x1B);
+  }
+
+  if (flipud) {
+    u0 = _mm_add_epi32(in[3], v0);
+    u1 = _mm_add_epi32(in[2], v1);
+    u2 = _mm_add_epi32(in[1], v2);
+    u3 = _mm_add_epi32(in[0], v3);
+  } else {
+    u0 = _mm_add_epi32(in[0], v0);
+    u1 = _mm_add_epi32(in[1], v1);
+    u2 = _mm_add_epi32(in[2], v2);
+    u3 = _mm_add_epi32(in[3], v3);
+  }
+
+  v0 = _mm_packus_epi32(u0, u1);
+  v2 = _mm_packus_epi32(u2, u3);
+
+  u0 = highbd_clamp_epi16(v0, bd);
+  u2 = highbd_clamp_epi16(v2, bd);
+
+  v0 = _mm_unpacklo_epi64(u0, u0);
+  v1 = _mm_unpackhi_epi64(u0, u0);
+  v2 = _mm_unpacklo_epi64(u2, u2);
+  v3 = _mm_unpackhi_epi64(u2, u2);
+
+  _mm_storel_epi64((__m128i *)(output + 0 * stride), v0);
+  _mm_storel_epi64((__m128i *)(output + 1 * stride), v1);
+  _mm_storel_epi64((__m128i *)(output + 2 * stride), v2);
+  _mm_storel_epi64((__m128i *)(output + 3 * stride), v3);
+}
+
+void vp10_inv_txfm2d_add_4x4_sse4_1(const int32_t *coeff, uint16_t *output,
+                                    int stride, int tx_type, int bd) {
+  __m128i in[4];
+  const TXFM_2D_CFG *cfg = NULL;
+
+  switch (tx_type) {
+    case DCT_DCT:
+      cfg = &inv_txfm_2d_cfg_dct_dct_4;
+      load_buffer_4x4(coeff, in);
+      idct4x4_sse4_1(in, cfg->cos_bit_row[2]);
+      idct4x4_sse4_1(in, cfg->cos_bit_col[2]);
+      write_buffer_4x4(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      break;
+    case ADST_DCT:
+      cfg = &inv_txfm_2d_cfg_adst_dct_4;
+      load_buffer_4x4(coeff, in);
+      idct4x4_sse4_1(in, cfg->cos_bit_row[2]);
+      iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+      write_buffer_4x4(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      break;
+    case DCT_ADST:
+      cfg = &inv_txfm_2d_cfg_dct_adst_4;
+      load_buffer_4x4(coeff, in);
+      iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+      idct4x4_sse4_1(in, cfg->cos_bit_col[2]);
+      write_buffer_4x4(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      break;
+    case ADST_ADST:
+      cfg = &inv_txfm_2d_cfg_adst_adst_4;
+      load_buffer_4x4(coeff, in);
+      iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+      iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+      write_buffer_4x4(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      break;
+#if CONFIG_EXT_TX
+    case FLIPADST_DCT:
+      cfg = &inv_txfm_2d_cfg_adst_dct_4;
+      load_buffer_4x4(coeff, in);
+      idct4x4_sse4_1(in, cfg->cos_bit_row[2]);
+      iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+      write_buffer_4x4(in, output, stride, 0, 1, -cfg->shift[1], bd);
+      break;
+    case DCT_FLIPADST:
+      cfg = &inv_txfm_2d_cfg_dct_adst_4;
+      load_buffer_4x4(coeff, in);
+      iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+      idct4x4_sse4_1(in, cfg->cos_bit_col[2]);
+      write_buffer_4x4(in, output, stride, 1, 0, -cfg->shift[1], bd);
+      break;
+    case FLIPADST_FLIPADST:
+      cfg = &inv_txfm_2d_cfg_adst_adst_4;
+      load_buffer_4x4(coeff, in);
+      iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+      iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+      write_buffer_4x4(in, output, stride, 1, 1, -cfg->shift[1], bd);
+      break;
+    case ADST_FLIPADST:
+      cfg = &inv_txfm_2d_cfg_adst_adst_4;
+      load_buffer_4x4(coeff, in);
+      iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+      iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+      write_buffer_4x4(in, output, stride, 1, 0, -cfg->shift[1], bd);
+      break;
+    case FLIPADST_ADST:
+      cfg = &inv_txfm_2d_cfg_adst_adst_4;
+      load_buffer_4x4(coeff, in);
+      iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+      iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+      write_buffer_4x4(in, output, stride, 0, 1, -cfg->shift[1], bd);
+      break;
+#endif  // CONFIG_EXT_TX
+    default: assert(0);
+  }
+}
+
+// 8x8
+static void load_buffer_8x8(const int32_t *coeff, __m128i *in) {
+  in[0] = _mm_load_si128((const __m128i *)(coeff + 0));
+  in[1] = _mm_load_si128((const __m128i *)(coeff + 4));
+  in[2] = _mm_load_si128((const __m128i *)(coeff + 8));
+  in[3] = _mm_load_si128((const __m128i *)(coeff + 12));
+  in[4] = _mm_load_si128((const __m128i *)(coeff + 16));
+  in[5] = _mm_load_si128((const __m128i *)(coeff + 20));
+  in[6] = _mm_load_si128((const __m128i *)(coeff + 24));
+  in[7] = _mm_load_si128((const __m128i *)(coeff + 28));
+  in[8] = _mm_load_si128((const __m128i *)(coeff + 32));
+  in[9] = _mm_load_si128((const __m128i *)(coeff + 36));
+  in[10] = _mm_load_si128((const __m128i *)(coeff + 40));
+  in[11] = _mm_load_si128((const __m128i *)(coeff + 44));
+  in[12] = _mm_load_si128((const __m128i *)(coeff + 48));
+  in[13] = _mm_load_si128((const __m128i *)(coeff + 52));
+  in[14] = _mm_load_si128((const __m128i *)(coeff + 56));
+  in[15] = _mm_load_si128((const __m128i *)(coeff + 60));
+}
+
+static void idct8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
+  const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+  const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
+  const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
+  const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
+  const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
+  const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
+  const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
+  const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+  const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
+  const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
+  const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
+  const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+  __m128i u0, u1, u2, u3, u4, u5, u6, u7;
+  __m128i v0, v1, v2, v3, v4, v5, v6, v7;
+  __m128i x, y;
+  int col;
+
+  // Note:
+  //  Even column: 0, 2, ..., 14
+  //  Odd column: 1, 3, ..., 15
+  //  one even column plus one odd column constructs one row (8 coeffs)
+  //  total we have 8 rows (8x8).
+  for (col = 0; col < 2; ++col) {
+    // stage 0
+    // stage 1
+    // stage 2
+    u0 = in[0 * 2 + col];
+    u1 = in[4 * 2 + col];
+    u2 = in[2 * 2 + col];
+    u3 = in[6 * 2 + col];
+
+    x = _mm_mullo_epi32(in[1 * 2 + col], cospi56);
+    y = _mm_mullo_epi32(in[7 * 2 + col], cospim8);
+    u4 = _mm_add_epi32(x, y);
+    u4 = _mm_add_epi32(u4, rnding);
+    u4 = _mm_srai_epi32(u4, bit);
+
+    x = _mm_mullo_epi32(in[1 * 2 + col], cospi8);
+    y = _mm_mullo_epi32(in[7 * 2 + col], cospi56);
+    u7 = _mm_add_epi32(x, y);
+    u7 = _mm_add_epi32(u7, rnding);
+    u7 = _mm_srai_epi32(u7, bit);
+
+    x = _mm_mullo_epi32(in[5 * 2 + col], cospi24);
+    y = _mm_mullo_epi32(in[3 * 2 + col], cospim40);
+    u5 = _mm_add_epi32(x, y);
+    u5 = _mm_add_epi32(u5, rnding);
+    u5 = _mm_srai_epi32(u5, bit);
+
+    x = _mm_mullo_epi32(in[5 * 2 + col], cospi40);
+    y = _mm_mullo_epi32(in[3 * 2 + col], cospi24);
+    u6 = _mm_add_epi32(x, y);
+    u6 = _mm_add_epi32(u6, rnding);
+    u6 = _mm_srai_epi32(u6, bit);
+
+    // stage 3
+    x = _mm_mullo_epi32(u0, cospi32);
+    y = _mm_mullo_epi32(u1, cospi32);
+    v0 = _mm_add_epi32(x, y);
+    v0 = _mm_add_epi32(v0, rnding);
+    v0 = _mm_srai_epi32(v0, bit);
+
+    v1 = _mm_sub_epi32(x, y);
+    v1 = _mm_add_epi32(v1, rnding);
+    v1 = _mm_srai_epi32(v1, bit);
+
+    x = _mm_mullo_epi32(u2, cospi48);
+    y = _mm_mullo_epi32(u3, cospim16);
+    v2 = _mm_add_epi32(x, y);
+    v2 = _mm_add_epi32(v2, rnding);
+    v2 = _mm_srai_epi32(v2, bit);
+
+    x = _mm_mullo_epi32(u2, cospi16);
+    y = _mm_mullo_epi32(u3, cospi48);
+    v3 = _mm_add_epi32(x, y);
+    v3 = _mm_add_epi32(v3, rnding);
+    v3 = _mm_srai_epi32(v3, bit);
+
+    v4 = _mm_add_epi32(u4, u5);
+    v5 = _mm_sub_epi32(u4, u5);
+    v6 = _mm_sub_epi32(u7, u6);
+    v7 = _mm_add_epi32(u6, u7);
+
+    // stage 4
+    u0 = _mm_add_epi32(v0, v3);
+    u1 = _mm_add_epi32(v1, v2);
+    u2 = _mm_sub_epi32(v1, v2);
+    u3 = _mm_sub_epi32(v0, v3);
+    u4 = v4;
+    u7 = v7;
+
+    x = _mm_mullo_epi32(v5, cospi32);
+    y = _mm_mullo_epi32(v6, cospi32);
+    u6 = _mm_add_epi32(y, x);
+    u6 = _mm_add_epi32(u6, rnding);
+    u6 = _mm_srai_epi32(u6, bit);
+
+    u5 = _mm_sub_epi32(y, x);
+    u5 = _mm_add_epi32(u5, rnding);
+    u5 = _mm_srai_epi32(u5, bit);
+
+    // stage 5
+    out[0 * 2 + col] = _mm_add_epi32(u0, u7);
+    out[1 * 2 + col] = _mm_add_epi32(u1, u6);
+    out[2 * 2 + col] = _mm_add_epi32(u2, u5);
+    out[3 * 2 + col] = _mm_add_epi32(u3, u4);
+    out[4 * 2 + col] = _mm_sub_epi32(u3, u4);
+    out[5 * 2 + col] = _mm_sub_epi32(u2, u5);
+    out[6 * 2 + col] = _mm_sub_epi32(u1, u6);
+    out[7 * 2 + col] = _mm_sub_epi32(u0, u7);
+  }
+}
+
+static void iadst8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
+  const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+  const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+  const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
+  const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
+  const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
+  const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
+  const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
+  const __m128i cospim4 = _mm_set1_epi32(-cospi[4]);
+  const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
+  const __m128i cospi20 = _mm_set1_epi32(cospi[20]);
+  const __m128i cospim20 = _mm_set1_epi32(-cospi[20]);
+  const __m128i cospi44 = _mm_set1_epi32(cospi[44]);
+  const __m128i cospi28 = _mm_set1_epi32(cospi[28]);
+  const __m128i cospi36 = _mm_set1_epi32(cospi[36]);
+  const __m128i cospim36 = _mm_set1_epi32(-cospi[36]);
+  const __m128i cospi52 = _mm_set1_epi32(cospi[52]);
+  const __m128i cospim52 = _mm_set1_epi32(-cospi[52]);
+  const __m128i cospi12 = _mm_set1_epi32(cospi[12]);
+  const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+  const __m128i zero = _mm_setzero_si128();
+  __m128i u0, u1, u2, u3, u4, u5, u6, u7;
+  __m128i v0, v1, v2, v3, v4, v5, v6, v7;
+  __m128i x, y;
+  int col;
+
+  // Note:
+  //  Even column: 0, 2, ..., 14
+  //  Odd column: 1, 3, ..., 15
+  //  one even column plus one odd column constructs one row (8 coeffs)
+  //  total we have 8 rows (8x8).
+  for (col = 0; col < 2; ++col) {
+    // stage 0
+    // stage 1
+    u0 = in[2 * 0 + col];
+    u1 = _mm_sub_epi32(zero, in[2 * 7 + col]);
+    u2 = _mm_sub_epi32(zero, in[2 * 3 + col]);
+    u3 = in[2 * 4 + col];
+    u4 = _mm_sub_epi32(zero, in[2 * 1 + col]);
+    u5 = in[2 * 6 + col];
+    u6 = in[2 * 2 + col];
+    u7 = _mm_sub_epi32(zero, in[2 * 5 + col]);
+
+    // stage 2
+    v0 = u0;
+    v1 = u1;
+
+    x = _mm_mullo_epi32(u2, cospi32);
+    y = _mm_mullo_epi32(u3, cospi32);
+    v2 = _mm_add_epi32(x, y);
+    v2 = _mm_add_epi32(v2, rnding);
+    v2 = _mm_srai_epi32(v2, bit);
+
+    v3 = _mm_sub_epi32(x, y);
+    v3 = _mm_add_epi32(v3, rnding);
+    v3 = _mm_srai_epi32(v3, bit);
+
+    v4 = u4;
+    v5 = u5;
+
+    x = _mm_mullo_epi32(u6, cospi32);
+    y = _mm_mullo_epi32(u7, cospi32);
+    v6 = _mm_add_epi32(x, y);
+    v6 = _mm_add_epi32(v6, rnding);
+    v6 = _mm_srai_epi32(v6, bit);
+
+    v7 = _mm_sub_epi32(x, y);
+    v7 = _mm_add_epi32(v7, rnding);
+    v7 = _mm_srai_epi32(v7, bit);
+
+    // stage 3
+    u0 = _mm_add_epi32(v0, v2);
+    u1 = _mm_add_epi32(v1, v3);
+    u2 = _mm_sub_epi32(v0, v2);
+    u3 = _mm_sub_epi32(v1, v3);
+    u4 = _mm_add_epi32(v4, v6);
+    u5 = _mm_add_epi32(v5, v7);
+    u6 = _mm_sub_epi32(v4, v6);
+    u7 = _mm_sub_epi32(v5, v7);
+
+    // stage 4
+    v0 = u0;
+    v1 = u1;
+    v2 = u2;
+    v3 = u3;
+
+    x = _mm_mullo_epi32(u4, cospi16);
+    y = _mm_mullo_epi32(u5, cospi48);
+    v4 = _mm_add_epi32(x, y);
+    v4 = _mm_add_epi32(v4, rnding);
+    v4 = _mm_srai_epi32(v4, bit);
+
+    x = _mm_mullo_epi32(u4, cospi48);
+    y = _mm_mullo_epi32(u5, cospim16);
+    v5 = _mm_add_epi32(x, y);
+    v5 = _mm_add_epi32(v5, rnding);
+    v5 = _mm_srai_epi32(v5, bit);
+
+    x = _mm_mullo_epi32(u6, cospim48);
+    y = _mm_mullo_epi32(u7, cospi16);
+    v6 = _mm_add_epi32(x, y);
+    v6 = _mm_add_epi32(v6, rnding);
+    v6 = _mm_srai_epi32(v6, bit);
+
+    x = _mm_mullo_epi32(u6, cospi16);
+    y = _mm_mullo_epi32(u7, cospi48);
+    v7 = _mm_add_epi32(x, y);
+    v7 = _mm_add_epi32(v7, rnding);
+    v7 = _mm_srai_epi32(v7, bit);
+
+    // stage 5
+    u0 = _mm_add_epi32(v0, v4);
+    u1 = _mm_add_epi32(v1, v5);
+    u2 = _mm_add_epi32(v2, v6);
+    u3 = _mm_add_epi32(v3, v7);
+    u4 = _mm_sub_epi32(v0, v4);
+    u5 = _mm_sub_epi32(v1, v5);
+    u6 = _mm_sub_epi32(v2, v6);
+    u7 = _mm_sub_epi32(v3, v7);
+
+    // stage 6
+    x = _mm_mullo_epi32(u0, cospi4);
+    y = _mm_mullo_epi32(u1, cospi60);
+    v0 = _mm_add_epi32(x, y);
+    v0 = _mm_add_epi32(v0, rnding);
+    v0 = _mm_srai_epi32(v0, bit);
+
+    x = _mm_mullo_epi32(u0, cospi60);
+    y = _mm_mullo_epi32(u1, cospim4);
+    v1 = _mm_add_epi32(x, y);
+    v1 = _mm_add_epi32(v1, rnding);
+    v1 = _mm_srai_epi32(v1, bit);
+
+    x = _mm_mullo_epi32(u2, cospi20);
+    y = _mm_mullo_epi32(u3, cospi44);
+    v2 = _mm_add_epi32(x, y);
+    v2 = _mm_add_epi32(v2, rnding);
+    v2 = _mm_srai_epi32(v2, bit);
+
+    x = _mm_mullo_epi32(u2, cospi44);
+    y = _mm_mullo_epi32(u3, cospim20);
+    v3 = _mm_add_epi32(x, y);
+    v3 = _mm_add_epi32(v3, rnding);
+    v3 = _mm_srai_epi32(v3, bit);
+
+    x = _mm_mullo_epi32(u4, cospi36);
+    y = _mm_mullo_epi32(u5, cospi28);
+    v4 = _mm_add_epi32(x, y);
+    v4 = _mm_add_epi32(v4, rnding);
+    v4 = _mm_srai_epi32(v4, bit);
+
+    x = _mm_mullo_epi32(u4, cospi28);
+    y = _mm_mullo_epi32(u5, cospim36);
+    v5 = _mm_add_epi32(x, y);
+    v5 = _mm_add_epi32(v5, rnding);
+    v5 = _mm_srai_epi32(v5, bit);
+
+    x = _mm_mullo_epi32(u6, cospi52);
+    y = _mm_mullo_epi32(u7, cospi12);
+    v6 = _mm_add_epi32(x, y);
+    v6 = _mm_add_epi32(v6, rnding);
+    v6 = _mm_srai_epi32(v6, bit);
+
+    x = _mm_mullo_epi32(u6, cospi12);
+    y = _mm_mullo_epi32(u7, cospim52);
+    v7 = _mm_add_epi32(x, y);
+    v7 = _mm_add_epi32(v7, rnding);
+    v7 = _mm_srai_epi32(v7, bit);
+
+    // stage 7
+    out[2 * 0 + col] = v1;
+    out[2 * 1 + col] = v6;
+    out[2 * 2 + col] = v3;
+    out[2 * 3 + col] = v4;
+    out[2 * 4 + col] = v5;
+    out[2 * 5 + col] = v2;
+    out[2 * 6 + col] = v7;
+    out[2 * 7 + col] = v0;
+  }
+}
+
+static void round_shift_8x8(__m128i *in, int shift) {
+  round_shift_4x4(&in[0], shift);
+  round_shift_4x4(&in[4], shift);
+  round_shift_4x4(&in[8], shift);
+  round_shift_4x4(&in[12], shift);
+}
+
+static __m128i get_recon_8x8(const __m128i pred, __m128i res_lo, __m128i res_hi,
+                             int fliplr, int bd) {
+  __m128i x0, x1;
+  const __m128i zero = _mm_setzero_si128();
+
+  x0 = _mm_unpacklo_epi16(pred, zero);
+  x1 = _mm_unpackhi_epi16(pred, zero);
+
+  if (fliplr) {
+    res_lo = _mm_shuffle_epi32(res_lo, 0x1B);
+    res_hi = _mm_shuffle_epi32(res_hi, 0x1B);
+    x0 = _mm_add_epi32(res_hi, x0);
+    x1 = _mm_add_epi32(res_lo, x1);
+
+  } else {
+    x0 = _mm_add_epi32(res_lo, x0);
+    x1 = _mm_add_epi32(res_hi, x1);
+  }
+
+  x0 = _mm_packus_epi32(x0, x1);
+  return highbd_clamp_epi16(x0, bd);
+}
+
+static void write_buffer_8x8(__m128i *in, uint16_t *output, int stride,
+                             int fliplr, int flipud, int shift, int bd) {
+  __m128i u0, u1, u2, u3, u4, u5, u6, u7;
+  __m128i v0, v1, v2, v3, v4, v5, v6, v7;
+
+  round_shift_8x8(in, shift);
+
+  v0 = _mm_load_si128((__m128i const *)(output + 0 * stride));
+  v1 = _mm_load_si128((__m128i const *)(output + 1 * stride));
+  v2 = _mm_load_si128((__m128i const *)(output + 2 * stride));
+  v3 = _mm_load_si128((__m128i const *)(output + 3 * stride));
+  v4 = _mm_load_si128((__m128i const *)(output + 4 * stride));
+  v5 = _mm_load_si128((__m128i const *)(output + 5 * stride));
+  v6 = _mm_load_si128((__m128i const *)(output + 6 * stride));
+  v7 = _mm_load_si128((__m128i const *)(output + 7 * stride));
+
+  if (flipud) {
+    u0 = get_recon_8x8(v0, in[14], in[15], fliplr, bd);
+    u1 = get_recon_8x8(v1, in[12], in[13], fliplr, bd);
+    u2 = get_recon_8x8(v2, in[10], in[11], fliplr, bd);
+    u3 = get_recon_8x8(v3, in[8], in[9], fliplr, bd);
+    u4 = get_recon_8x8(v4, in[6], in[7], fliplr, bd);
+    u5 = get_recon_8x8(v5, in[4], in[5], fliplr, bd);
+    u6 = get_recon_8x8(v6, in[2], in[3], fliplr, bd);
+    u7 = get_recon_8x8(v7, in[0], in[1], fliplr, bd);
+  } else {
+    u0 = get_recon_8x8(v0, in[0], in[1], fliplr, bd);
+    u1 = get_recon_8x8(v1, in[2], in[3], fliplr, bd);
+    u2 = get_recon_8x8(v2, in[4], in[5], fliplr, bd);
+    u3 = get_recon_8x8(v3, in[6], in[7], fliplr, bd);
+    u4 = get_recon_8x8(v4, in[8], in[9], fliplr, bd);
+    u5 = get_recon_8x8(v5, in[10], in[11], fliplr, bd);
+    u6 = get_recon_8x8(v6, in[12], in[13], fliplr, bd);
+    u7 = get_recon_8x8(v7, in[14], in[15], fliplr, bd);
+  }
+
+  _mm_store_si128((__m128i *)(output + 0 * stride), u0);
+  _mm_store_si128((__m128i *)(output + 1 * stride), u1);
+  _mm_store_si128((__m128i *)(output + 2 * stride), u2);
+  _mm_store_si128((__m128i *)(output + 3 * stride), u3);
+  _mm_store_si128((__m128i *)(output + 4 * stride), u4);
+  _mm_store_si128((__m128i *)(output + 5 * stride), u5);
+  _mm_store_si128((__m128i *)(output + 6 * stride), u6);
+  _mm_store_si128((__m128i *)(output + 7 * stride), u7);
+}
+
+void vp10_inv_txfm2d_add_8x8_sse4_1(const int32_t *coeff, uint16_t *output,
+                                    int stride, int tx_type, int bd) {
+  __m128i in[16], out[16];
+  const TXFM_2D_CFG *cfg = NULL;
+
+  switch (tx_type) {
+    case DCT_DCT:
+      cfg = &inv_txfm_2d_cfg_dct_dct_8;
+      load_buffer_8x8(coeff, in);
+      transpose_8x8(in, out);
+      idct8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+      transpose_8x8(in, out);
+      idct8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
+      write_buffer_8x8(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      break;
+    case DCT_ADST:
+      cfg = &inv_txfm_2d_cfg_dct_adst_8;
+      load_buffer_8x8(coeff, in);
+      transpose_8x8(in, out);
+      iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+      transpose_8x8(in, out);
+      idct8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
+      write_buffer_8x8(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      break;
+    case ADST_DCT:
+      cfg = &inv_txfm_2d_cfg_adst_dct_8;
+      load_buffer_8x8(coeff, in);
+      transpose_8x8(in, out);
+      idct8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+      transpose_8x8(in, out);
+      iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
+      write_buffer_8x8(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      break;
+    case ADST_ADST:
+      cfg = &inv_txfm_2d_cfg_adst_adst_8;
+      load_buffer_8x8(coeff, in);
+      transpose_8x8(in, out);
+      iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+      transpose_8x8(in, out);
+      iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
+      write_buffer_8x8(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      break;
+#if CONFIG_EXT_TX
+    case FLIPADST_DCT:
+      cfg = &inv_txfm_2d_cfg_adst_dct_8;
+      load_buffer_8x8(coeff, in);
+      transpose_8x8(in, out);
+      idct8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+      transpose_8x8(in, out);
+      iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
+      write_buffer_8x8(in, output, stride, 0, 1, -cfg->shift[1], bd);
+      break;
+    case DCT_FLIPADST:
+      cfg = &inv_txfm_2d_cfg_dct_adst_8;
+      load_buffer_8x8(coeff, in);
+      transpose_8x8(in, out);
+      iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+      transpose_8x8(in, out);
+      idct8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
+      write_buffer_8x8(in, output, stride, 1, 0, -cfg->shift[1], bd);
+      break;
+    case ADST_FLIPADST:
+      cfg = &inv_txfm_2d_cfg_adst_adst_8;
+      load_buffer_8x8(coeff, in);
+      transpose_8x8(in, out);
+      iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+      transpose_8x8(in, out);
+      iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
+      write_buffer_8x8(in, output, stride, 1, 0, -cfg->shift[1], bd);
+      break;
+    case FLIPADST_FLIPADST:
+      cfg = &inv_txfm_2d_cfg_adst_adst_8;
+      load_buffer_8x8(coeff, in);
+      transpose_8x8(in, out);
+      iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+      transpose_8x8(in, out);
+      iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
+      write_buffer_8x8(in, output, stride, 1, 1, -cfg->shift[1], bd);
+      break;
+    case FLIPADST_ADST:
+      cfg = &inv_txfm_2d_cfg_adst_adst_8;
+      load_buffer_8x8(coeff, in);
+      transpose_8x8(in, out);
+      iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+      transpose_8x8(in, out);
+      iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
+      write_buffer_8x8(in, output, stride, 0, 1, -cfg->shift[1], bd);
+      break;
+#endif  // CONFIG_EXT_TX
+    default: assert(0);
+  }
+}
+
+// 16x16
+static void load_buffer_16x16(const int32_t *coeff, __m128i *in) {
+  int i;
+  for (i = 0; i < 64; ++i) {
+    in[i] = _mm_load_si128((const __m128i *)(coeff + (i << 2)));
+  }
+}
+
+static void assign_8x8_input_from_16x16(const __m128i *in, __m128i *in8x8,
+                                        int col) {
+  int i;
+  for (i = 0; i < 16; i += 2) {
+    in8x8[i] = in[col];
+    in8x8[i + 1] = in[col + 1];
+    col += 4;
+  }
+}
+
+static void swap_addr(uint16_t **output1, uint16_t **output2) {
+  uint16_t *tmp;
+  tmp = *output1;
+  *output1 = *output2;
+  *output2 = tmp;
+}
+
+static void write_buffer_16x16(__m128i *in, uint16_t *output, int stride,
+                               int fliplr, int flipud, int shift, int bd) {
+  __m128i in8x8[16];
+  uint16_t *leftUp = &output[0];
+  uint16_t *rightUp = &output[8];
+  uint16_t *leftDown = &output[8 * stride];
+  uint16_t *rightDown = &output[8 * stride + 8];
+
+  if (fliplr) {
+    swap_addr(&leftUp, &rightUp);
+    swap_addr(&leftDown, &rightDown);
+  }
+
+  if (flipud) {
+    swap_addr(&leftUp, &leftDown);
+    swap_addr(&rightUp, &rightDown);
+  }
+
+  // Left-up quarter
+  assign_8x8_input_from_16x16(in, in8x8, 0);
+  write_buffer_8x8(in8x8, leftUp, stride, fliplr, flipud, shift, bd);
+
+  // Right-up quarter
+  assign_8x8_input_from_16x16(in, in8x8, 2);
+  write_buffer_8x8(in8x8, rightUp, stride, fliplr, flipud, shift, bd);
+
+  // Left-down quarter
+  assign_8x8_input_from_16x16(in, in8x8, 32);
+  write_buffer_8x8(in8x8, leftDown, stride, fliplr, flipud, shift, bd);
+
+  // Right-down quarter
+  assign_8x8_input_from_16x16(in, in8x8, 34);
+  write_buffer_8x8(in8x8, rightDown, stride, fliplr, flipud, shift, bd);
+}
+
+static void idct16x16_sse4_1(__m128i *in, __m128i *out, int bit) {
+  const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+  const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
+  const __m128i cospim4 = _mm_set1_epi32(-cospi[4]);
+  const __m128i cospi28 = _mm_set1_epi32(cospi[28]);
+  const __m128i cospim36 = _mm_set1_epi32(-cospi[36]);
+  const __m128i cospi44 = _mm_set1_epi32(cospi[44]);
+  const __m128i cospi20 = _mm_set1_epi32(cospi[20]);
+  const __m128i cospim20 = _mm_set1_epi32(-cospi[20]);
+  const __m128i cospi12 = _mm_set1_epi32(cospi[12]);
+  const __m128i cospim52 = _mm_set1_epi32(-cospi[52]);
+  const __m128i cospi52 = _mm_set1_epi32(cospi[52]);
+  const __m128i cospi36 = _mm_set1_epi32(cospi[36]);
+  const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
+  const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
+  const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
+  const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
+  const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
+  const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
+  const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
+  const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+  const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
+  const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
+  const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
+  const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
+  const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+  __m128i u[16], v[16], x, y;
+  int col;
+
+  for (col = 0; col < 4; ++col) {
+    // stage 0
+    // stage 1
+    u[0] = in[0 * 4 + col];
+    u[1] = in[8 * 4 + col];
+    u[2] = in[4 * 4 + col];
+    u[3] = in[12 * 4 + col];
+    u[4] = in[2 * 4 + col];
+    u[5] = in[10 * 4 + col];
+    u[6] = in[6 * 4 + col];
+    u[7] = in[14 * 4 + col];
+    u[8] = in[1 * 4 + col];
+    u[9] = in[9 * 4 + col];
+    u[10] = in[5 * 4 + col];
+    u[11] = in[13 * 4 + col];
+    u[12] = in[3 * 4 + col];
+    u[13] = in[11 * 4 + col];
+    u[14] = in[7 * 4 + col];
+    u[15] = in[15 * 4 + col];
+
+    // stage 2
+    v[0] = u[0];
+    v[1] = u[1];
+    v[2] = u[2];
+    v[3] = u[3];
+    v[4] = u[4];
+    v[5] = u[5];
+    v[6] = u[6];
+    v[7] = u[7];
+
+    v[8] = half_btf_sse4_1(cospi60, u[8], cospim4, u[15], rnding, bit);
+    v[9] = half_btf_sse4_1(cospi28, u[9], cospim36, u[14], rnding, bit);
+    v[10] = half_btf_sse4_1(cospi44, u[10], cospim20, u[13], rnding, bit);
+    v[11] = half_btf_sse4_1(cospi12, u[11], cospim52, u[12], rnding, bit);
+    v[12] = half_btf_sse4_1(cospi52, u[11], cospi12, u[12], rnding, bit);
+    v[13] = half_btf_sse4_1(cospi20, u[10], cospi44, u[13], rnding, bit);
+    v[14] = half_btf_sse4_1(cospi36, u[9], cospi28, u[14], rnding, bit);
+    v[15] = half_btf_sse4_1(cospi4, u[8], cospi60, u[15], rnding, bit);
+
+    // stage 3
+    u[0] = v[0];
+    u[1] = v[1];
+    u[2] = v[2];
+    u[3] = v[3];
+    u[4] = half_btf_sse4_1(cospi56, v[4], cospim8, v[7], rnding, bit);
+    u[5] = half_btf_sse4_1(cospi24, v[5], cospim40, v[6], rnding, bit);
+    u[6] = half_btf_sse4_1(cospi40, v[5], cospi24, v[6], rnding, bit);
+    u[7] = half_btf_sse4_1(cospi8, v[4], cospi56, v[7], rnding, bit);
+    u[8] = _mm_add_epi32(v[8], v[9]);
+    u[9] = _mm_sub_epi32(v[8], v[9]);
+    u[10] = _mm_sub_epi32(v[11], v[10]);
+    u[11] = _mm_add_epi32(v[10], v[11]);
+    u[12] = _mm_add_epi32(v[12], v[13]);
+    u[13] = _mm_sub_epi32(v[12], v[13]);
+    u[14] = _mm_sub_epi32(v[15], v[14]);
+    u[15] = _mm_add_epi32(v[14], v[15]);
+
+    // stage 4
+    x = _mm_mullo_epi32(u[0], cospi32);
+    y = _mm_mullo_epi32(u[1], cospi32);
+    v[0] = _mm_add_epi32(x, y);
+    v[0] = _mm_add_epi32(v[0], rnding);
+    v[0] = _mm_srai_epi32(v[0], bit);
+
+    v[1] = _mm_sub_epi32(x, y);
+    v[1] = _mm_add_epi32(v[1], rnding);
+    v[1] = _mm_srai_epi32(v[1], bit);
+
+    v[2] = half_btf_sse4_1(cospi48, u[2], cospim16, u[3], rnding, bit);
+    v[3] = half_btf_sse4_1(cospi16, u[2], cospi48, u[3], rnding, bit);
+    v[4] = _mm_add_epi32(u[4], u[5]);
+    v[5] = _mm_sub_epi32(u[4], u[5]);
+    v[6] = _mm_sub_epi32(u[7], u[6]);
+    v[7] = _mm_add_epi32(u[6], u[7]);
+    v[8] = u[8];
+    v[9] = half_btf_sse4_1(cospim16, u[9], cospi48, u[14], rnding, bit);
+    v[10] = half_btf_sse4_1(cospim48, u[10], cospim16, u[13], rnding, bit);
+    v[11] = u[11];
+    v[12] = u[12];
+    v[13] = half_btf_sse4_1(cospim16, u[10], cospi48, u[13], rnding, bit);
+    v[14] = half_btf_sse4_1(cospi48, u[9], cospi16, u[14], rnding, bit);
+    v[15] = u[15];
+
+    // stage 5
+    u[0] = _mm_add_epi32(v[0], v[3]);
+    u[1] = _mm_add_epi32(v[1], v[2]);
+    u[2] = _mm_sub_epi32(v[1], v[2]);
+    u[3] = _mm_sub_epi32(v[0], v[3]);
+    u[4] = v[4];
+
+    x = _mm_mullo_epi32(v[5], cospi32);
+    y = _mm_mullo_epi32(v[6], cospi32);
+    u[5] = _mm_sub_epi32(y, x);
+    u[5] = _mm_add_epi32(u[5], rnding);
+    u[5] = _mm_srai_epi32(u[5], bit);
+
+    u[6] = _mm_add_epi32(y, x);
+    u[6] = _mm_add_epi32(u[6], rnding);
+    u[6] = _mm_srai_epi32(u[6], bit);
+
+    u[7] = v[7];
+    u[8] = _mm_add_epi32(v[8], v[11]);
+    u[9] = _mm_add_epi32(v[9], v[10]);
+    u[10] = _mm_sub_epi32(v[9], v[10]);
+    u[11] = _mm_sub_epi32(v[8], v[11]);
+    u[12] = _mm_sub_epi32(v[15], v[12]);
+    u[13] = _mm_sub_epi32(v[14], v[13]);
+    u[14] = _mm_add_epi32(v[13], v[14]);
+    u[15] = _mm_add_epi32(v[12], v[15]);
+
+    // stage 6
+    v[0] = _mm_add_epi32(u[0], u[7]);
+    v[1] = _mm_add_epi32(u[1], u[6]);
+    v[2] = _mm_add_epi32(u[2], u[5]);
+    v[3] = _mm_add_epi32(u[3], u[4]);
+    v[4] = _mm_sub_epi32(u[3], u[4]);
+    v[5] = _mm_sub_epi32(u[2], u[5]);
+    v[6] = _mm_sub_epi32(u[1], u[6]);
+    v[7] = _mm_sub_epi32(u[0], u[7]);
+    v[8] = u[8];
+    v[9] = u[9];
+
+    x = _mm_mullo_epi32(u[10], cospi32);
+    y = _mm_mullo_epi32(u[13], cospi32);
+    v[10] = _mm_sub_epi32(y, x);
+    v[10] = _mm_add_epi32(v[10], rnding);
+    v[10] = _mm_srai_epi32(v[10], bit);
+
+    v[13] = _mm_add_epi32(x, y);
+    v[13] = _mm_add_epi32(v[13], rnding);
+    v[13] = _mm_srai_epi32(v[13], bit);
+
+    x = _mm_mullo_epi32(u[11], cospi32);
+    y = _mm_mullo_epi32(u[12], cospi32);
+    v[11] = _mm_sub_epi32(y, x);
+    v[11] = _mm_add_epi32(v[11], rnding);
+    v[11] = _mm_srai_epi32(v[11], bit);
+
+    v[12] = _mm_add_epi32(x, y);
+    v[12] = _mm_add_epi32(v[12], rnding);
+    v[12] = _mm_srai_epi32(v[12], bit);
+
+    v[14] = u[14];
+    v[15] = u[15];
+
+    // stage 7
+    out[0 * 4 + col] = _mm_add_epi32(v[0], v[15]);
+    out[1 * 4 + col] = _mm_add_epi32(v[1], v[14]);
+    out[2 * 4 + col] = _mm_add_epi32(v[2], v[13]);
+    out[3 * 4 + col] = _mm_add_epi32(v[3], v[12]);
+    out[4 * 4 + col] = _mm_add_epi32(v[4], v[11]);
+    out[5 * 4 + col] = _mm_add_epi32(v[5], v[10]);
+    out[6 * 4 + col] = _mm_add_epi32(v[6], v[9]);
+    out[7 * 4 + col] = _mm_add_epi32(v[7], v[8]);
+    out[8 * 4 + col] = _mm_sub_epi32(v[7], v[8]);
+    out[9 * 4 + col] = _mm_sub_epi32(v[6], v[9]);
+    out[10 * 4 + col] = _mm_sub_epi32(v[5], v[10]);
+    out[11 * 4 + col] = _mm_sub_epi32(v[4], v[11]);
+    out[12 * 4 + col] = _mm_sub_epi32(v[3], v[12]);
+    out[13 * 4 + col] = _mm_sub_epi32(v[2], v[13]);
+    out[14 * 4 + col] = _mm_sub_epi32(v[1], v[14]);
+    out[15 * 4 + col] = _mm_sub_epi32(v[0], v[15]);
+  }
+}
+
+static void iadst16x16_sse4_1(__m128i *in, __m128i *out, int bit) {
+  const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+  const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+  const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
+  const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
+  const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
+  const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
+  const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
+  const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
+  const __m128i cospim56 = _mm_set1_epi32(-cospi[56]);
+  const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
+  const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
+  const __m128i cospim24 = _mm_set1_epi32(-cospi[24]);
+  const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
+  const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
+  const __m128i cospi2 = _mm_set1_epi32(cospi[2]);
+  const __m128i cospi62 = _mm_set1_epi32(cospi[62]);
+  const __m128i cospim2 = _mm_set1_epi32(-cospi[2]);
+  const __m128i cospi10 = _mm_set1_epi32(cospi[10]);
+  const __m128i cospi54 = _mm_set1_epi32(cospi[54]);
+  const __m128i cospim10 = _mm_set1_epi32(-cospi[10]);
+  const __m128i cospi18 = _mm_set1_epi32(cospi[18]);
+  const __m128i cospi46 = _mm_set1_epi32(cospi[46]);
+  const __m128i cospim18 = _mm_set1_epi32(-cospi[18]);
+  const __m128i cospi26 = _mm_set1_epi32(cospi[26]);
+  const __m128i cospi38 = _mm_set1_epi32(cospi[38]);
+  const __m128i cospim26 = _mm_set1_epi32(-cospi[26]);
+  const __m128i cospi34 = _mm_set1_epi32(cospi[34]);
+  const __m128i cospi30 = _mm_set1_epi32(cospi[30]);
+  const __m128i cospim34 = _mm_set1_epi32(-cospi[34]);
+  const __m128i cospi42 = _mm_set1_epi32(cospi[42]);
+  const __m128i cospi22 = _mm_set1_epi32(cospi[22]);
+  const __m128i cospim42 = _mm_set1_epi32(-cospi[42]);
+  const __m128i cospi50 = _mm_set1_epi32(cospi[50]);
+  const __m128i cospi14 = _mm_set1_epi32(cospi[14]);
+  const __m128i cospim50 = _mm_set1_epi32(-cospi[50]);
+  const __m128i cospi58 = _mm_set1_epi32(cospi[58]);
+  const __m128i cospi6 = _mm_set1_epi32(cospi[6]);
+  const __m128i cospim58 = _mm_set1_epi32(-cospi[58]);
+  const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+  const __m128i zero = _mm_setzero_si128();
+
+  __m128i u[16], v[16], x, y;
+  int col;
+
+  for (col = 0; col < 4; ++col) {
+    // stage 0
+    // stage 1
+    u[0] = in[0 * 4 + col];
+    u[1] = _mm_sub_epi32(zero, in[15 * 4 + col]);
+    u[2] = _mm_sub_epi32(zero, in[7 * 4 + col]);
+    u[3] = in[8 * 4 + col];
+    u[4] = _mm_sub_epi32(zero, in[3 * 4 + col]);
+    u[5] = in[12 * 4 + col];
+    u[6] = in[4 * 4 + col];
+    u[7] = _mm_sub_epi32(zero, in[11 * 4 + col]);
+    u[8] = _mm_sub_epi32(zero, in[1 * 4 + col]);
+    u[9] = in[14 * 4 + col];
+    u[10] = in[6 * 4 + col];
+    u[11] = _mm_sub_epi32(zero, in[9 * 4 + col]);
+    u[12] = in[2 * 4 + col];
+    u[13] = _mm_sub_epi32(zero, in[13 * 4 + col]);
+    u[14] = _mm_sub_epi32(zero, in[5 * 4 + col]);
+    u[15] = in[10 * 4 + col];
+
+    // stage 2
+    v[0] = u[0];
+    v[1] = u[1];
+
+    x = _mm_mullo_epi32(u[2], cospi32);
+    y = _mm_mullo_epi32(u[3], cospi32);
+    v[2] = _mm_add_epi32(x, y);
+    v[2] = _mm_add_epi32(v[2], rnding);
+    v[2] = _mm_srai_epi32(v[2], bit);
+
+    v[3] = _mm_sub_epi32(x, y);
+    v[3] = _mm_add_epi32(v[3], rnding);
+    v[3] = _mm_srai_epi32(v[3], bit);
+
+    v[4] = u[4];
+    v[5] = u[5];
+
+    x = _mm_mullo_epi32(u[6], cospi32);
+    y = _mm_mullo_epi32(u[7], cospi32);
+    v[6] = _mm_add_epi32(x, y);
+    v[6] = _mm_add_epi32(v[6], rnding);
+    v[6] = _mm_srai_epi32(v[6], bit);
+
+    v[7] = _mm_sub_epi32(x, y);
+    v[7] = _mm_add_epi32(v[7], rnding);
+    v[7] = _mm_srai_epi32(v[7], bit);
+
+    v[8] = u[8];
+    v[9] = u[9];
+
+    x = _mm_mullo_epi32(u[10], cospi32);
+    y = _mm_mullo_epi32(u[11], cospi32);
+    v[10] = _mm_add_epi32(x, y);
+    v[10] = _mm_add_epi32(v[10], rnding);
+    v[10] = _mm_srai_epi32(v[10], bit);
+
+    v[11] = _mm_sub_epi32(x, y);
+    v[11] = _mm_add_epi32(v[11], rnding);
+    v[11] = _mm_srai_epi32(v[11], bit);
+
+    v[12] = u[12];
+    v[13] = u[13];
+
+    x = _mm_mullo_epi32(u[14], cospi32);
+    y = _mm_mullo_epi32(u[15], cospi32);
+    v[14] = _mm_add_epi32(x, y);
+    v[14] = _mm_add_epi32(v[14], rnding);
+    v[14] = _mm_srai_epi32(v[14], bit);
+
+    v[15] = _mm_sub_epi32(x, y);
+    v[15] = _mm_add_epi32(v[15], rnding);
+    v[15] = _mm_srai_epi32(v[15], bit);
+
+    // stage 3
+    u[0] = _mm_add_epi32(v[0], v[2]);
+    u[1] = _mm_add_epi32(v[1], v[3]);
+    u[2] = _mm_sub_epi32(v[0], v[2]);
+    u[3] = _mm_sub_epi32(v[1], v[3]);
+    u[4] = _mm_add_epi32(v[4], v[6]);
+    u[5] = _mm_add_epi32(v[5], v[7]);
+    u[6] = _mm_sub_epi32(v[4], v[6]);
+    u[7] = _mm_sub_epi32(v[5], v[7]);
+    u[8] = _mm_add_epi32(v[8], v[10]);
+    u[9] = _mm_add_epi32(v[9], v[11]);
+    u[10] = _mm_sub_epi32(v[8], v[10]);
+    u[11] = _mm_sub_epi32(v[9], v[11]);
+    u[12] = _mm_add_epi32(v[12], v[14]);
+    u[13] = _mm_add_epi32(v[13], v[15]);
+    u[14] = _mm_sub_epi32(v[12], v[14]);
+    u[15] = _mm_sub_epi32(v[13], v[15]);
+
+    // stage 4
+    v[0] = u[0];
+    v[1] = u[1];
+    v[2] = u[2];
+    v[3] = u[3];
+    v[4] = half_btf_sse4_1(cospi16, u[4], cospi48, u[5], rnding, bit);
+    v[5] = half_btf_sse4_1(cospi48, u[4], cospim16, u[5], rnding, bit);
+    v[6] = half_btf_sse4_1(cospim48, u[6], cospi16, u[7], rnding, bit);
+    v[7] = half_btf_sse4_1(cospi16, u[6], cospi48, u[7], rnding, bit);
+    v[8] = u[8];
+    v[9] = u[9];
+    v[10] = u[10];
+    v[11] = u[11];
+    v[12] = half_btf_sse4_1(cospi16, u[12], cospi48, u[13], rnding, bit);
+    v[13] = half_btf_sse4_1(cospi48, u[12], cospim16, u[13], rnding, bit);
+    v[14] = half_btf_sse4_1(cospim48, u[14], cospi16, u[15], rnding, bit);
+    v[15] = half_btf_sse4_1(cospi16, u[14], cospi48, u[15], rnding, bit);
+
+    // stage 5
+    u[0] = _mm_add_epi32(v[0], v[4]);
+    u[1] = _mm_add_epi32(v[1], v[5]);
+    u[2] = _mm_add_epi32(v[2], v[6]);
+    u[3] = _mm_add_epi32(v[3], v[7]);
+    u[4] = _mm_sub_epi32(v[0], v[4]);
+    u[5] = _mm_sub_epi32(v[1], v[5]);
+    u[6] = _mm_sub_epi32(v[2], v[6]);
+    u[7] = _mm_sub_epi32(v[3], v[7]);
+    u[8] = _mm_add_epi32(v[8], v[12]);
+    u[9] = _mm_add_epi32(v[9], v[13]);
+    u[10] = _mm_add_epi32(v[10], v[14]);
+    u[11] = _mm_add_epi32(v[11], v[15]);
+    u[12] = _mm_sub_epi32(v[8], v[12]);
+    u[13] = _mm_sub_epi32(v[9], v[13]);
+    u[14] = _mm_sub_epi32(v[10], v[14]);
+    u[15] = _mm_sub_epi32(v[11], v[15]);
+
+    // stage 6
+    v[0] = u[0];
+    v[1] = u[1];
+    v[2] = u[2];
+    v[3] = u[3];
+    v[4] = u[4];
+    v[5] = u[5];
+    v[6] = u[6];
+    v[7] = u[7];
+    v[8] = half_btf_sse4_1(cospi8, u[8], cospi56, u[9], rnding, bit);
+    v[9] = half_btf_sse4_1(cospi56, u[8], cospim8, u[9], rnding, bit);
+    v[10] = half_btf_sse4_1(cospi40, u[10], cospi24, u[11], rnding, bit);
+    v[11] = half_btf_sse4_1(cospi24, u[10], cospim40, u[11], rnding, bit);
+    v[12] = half_btf_sse4_1(cospim56, u[12], cospi8, u[13], rnding, bit);
+    v[13] = half_btf_sse4_1(cospi8, u[12], cospi56, u[13], rnding, bit);
+    v[14] = half_btf_sse4_1(cospim24, u[14], cospi40, u[15], rnding, bit);
+    v[15] = half_btf_sse4_1(cospi40, u[14], cospi24, u[15], rnding, bit);
+
+    // stage 7
+    u[0] = _mm_add_epi32(v[0], v[8]);
+    u[1] = _mm_add_epi32(v[1], v[9]);
+    u[2] = _mm_add_epi32(v[2], v[10]);
+    u[3] = _mm_add_epi32(v[3], v[11]);
+    u[4] = _mm_add_epi32(v[4], v[12]);
+    u[5] = _mm_add_epi32(v[5], v[13]);
+    u[6] = _mm_add_epi32(v[6], v[14]);
+    u[7] = _mm_add_epi32(v[7], v[15]);
+    u[8] = _mm_sub_epi32(v[0], v[8]);
+    u[9] = _mm_sub_epi32(v[1], v[9]);
+    u[10] = _mm_sub_epi32(v[2], v[10]);
+    u[11] = _mm_sub_epi32(v[3], v[11]);
+    u[12] = _mm_sub_epi32(v[4], v[12]);
+    u[13] = _mm_sub_epi32(v[5], v[13]);
+    u[14] = _mm_sub_epi32(v[6], v[14]);
+    u[15] = _mm_sub_epi32(v[7], v[15]);
+
+    // stage 8
+    v[0] = half_btf_sse4_1(cospi2, u[0], cospi62, u[1], rnding, bit);
+    v[1] = half_btf_sse4_1(cospi62, u[0], cospim2, u[1], rnding, bit);
+    v[2] = half_btf_sse4_1(cospi10, u[2], cospi54, u[3], rnding, bit);
+    v[3] = half_btf_sse4_1(cospi54, u[2], cospim10, u[3], rnding, bit);
+    v[4] = half_btf_sse4_1(cospi18, u[4], cospi46, u[5], rnding, bit);
+    v[5] = half_btf_sse4_1(cospi46, u[4], cospim18, u[5], rnding, bit);
+    v[6] = half_btf_sse4_1(cospi26, u[6], cospi38, u[7], rnding, bit);
+    v[7] = half_btf_sse4_1(cospi38, u[6], cospim26, u[7], rnding, bit);
+    v[8] = half_btf_sse4_1(cospi34, u[8], cospi30, u[9], rnding, bit);
+    v[9] = half_btf_sse4_1(cospi30, u[8], cospim34, u[9], rnding, bit);
+    v[10] = half_btf_sse4_1(cospi42, u[10], cospi22, u[11], rnding, bit);
+    v[11] = half_btf_sse4_1(cospi22, u[10], cospim42, u[11], rnding, bit);
+    v[12] = half_btf_sse4_1(cospi50, u[12], cospi14, u[13], rnding, bit);
+    v[13] = half_btf_sse4_1(cospi14, u[12], cospim50, u[13], rnding, bit);
+    v[14] = half_btf_sse4_1(cospi58, u[14], cospi6, u[15], rnding, bit);
+    v[15] = half_btf_sse4_1(cospi6, u[14], cospim58, u[15], rnding, bit);
+
+    // stage 9
+    out[0 * 4 + col] = v[1];
+    out[1 * 4 + col] = v[14];
+    out[2 * 4 + col] = v[3];
+    out[3 * 4 + col] = v[12];
+    out[4 * 4 + col] = v[5];
+    out[5 * 4 + col] = v[10];
+    out[6 * 4 + col] = v[7];
+    out[7 * 4 + col] = v[8];
+    out[8 * 4 + col] = v[9];
+    out[9 * 4 + col] = v[6];
+    out[10 * 4 + col] = v[11];
+    out[11 * 4 + col] = v[4];
+    out[12 * 4 + col] = v[13];
+    out[13 * 4 + col] = v[2];
+    out[14 * 4 + col] = v[15];
+    out[15 * 4 + col] = v[0];
+  }
+}
+
+static void round_shift_16x16(__m128i *in, int shift) {
+  round_shift_8x8(&in[0], shift);
+  round_shift_8x8(&in[16], shift);
+  round_shift_8x8(&in[32], shift);
+  round_shift_8x8(&in[48], shift);
+}
+
+void vp10_inv_txfm2d_add_16x16_sse4_1(const int32_t *coeff, uint16_t *output,
+                                      int stride, int tx_type, int bd) {
+  __m128i in[64], out[64];
+  const TXFM_2D_CFG *cfg = NULL;
+
+  switch (tx_type) {
+    case DCT_DCT:
+      cfg = &inv_txfm_2d_cfg_dct_dct_16;
+      load_buffer_16x16(coeff, in);
+      transpose_16x16(in, out);
+      idct16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
+      round_shift_16x16(in, -cfg->shift[0]);
+      transpose_16x16(in, out);
+      idct16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
+      write_buffer_16x16(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      break;
+    case DCT_ADST:
+      cfg = &inv_txfm_2d_cfg_dct_adst_16;
+      load_buffer_16x16(coeff, in);
+      transpose_16x16(in, out);
+      iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
+      round_shift_16x16(in, -cfg->shift[0]);
+      transpose_16x16(in, out);
+      idct16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
+      write_buffer_16x16(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      break;
+    case ADST_DCT:
+      cfg = &inv_txfm_2d_cfg_adst_dct_16;
+      load_buffer_16x16(coeff, in);
+      transpose_16x16(in, out);
+      idct16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
+      round_shift_16x16(in, -cfg->shift[0]);
+      transpose_16x16(in, out);
+      iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
+      write_buffer_16x16(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      break;
+    case ADST_ADST:
+      cfg = &inv_txfm_2d_cfg_adst_adst_16;
+      load_buffer_16x16(coeff, in);
+      transpose_16x16(in, out);
+      iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
+      round_shift_16x16(in, -cfg->shift[0]);
+      transpose_16x16(in, out);
+      iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
+      write_buffer_16x16(in, output, stride, 0, 0, -cfg->shift[1], bd);
+      break;
+#if CONFIG_EXT_TX
+    case FLIPADST_DCT:
+      cfg = &inv_txfm_2d_cfg_adst_dct_16;
+      load_buffer_16x16(coeff, in);
+      transpose_16x16(in, out);
+      idct16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
+      round_shift_16x16(in, -cfg->shift[0]);
+      transpose_16x16(in, out);
+      iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
+      write_buffer_16x16(in, output, stride, 0, 1, -cfg->shift[1], bd);
+      break;
+    case DCT_FLIPADST:
+      cfg = &inv_txfm_2d_cfg_dct_adst_16;
+      load_buffer_16x16(coeff, in);
+      transpose_16x16(in, out);
+      iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
+      round_shift_16x16(in, -cfg->shift[0]);
+      transpose_16x16(in, out);
+      idct16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
+      write_buffer_16x16(in, output, stride, 1, 0, -cfg->shift[1], bd);
+      break;
+    case ADST_FLIPADST:
+      cfg = &inv_txfm_2d_cfg_adst_adst_16;
+      load_buffer_16x16(coeff, in);
+      transpose_16x16(in, out);
+      iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
+      round_shift_16x16(in, -cfg->shift[0]);
+      transpose_16x16(in, out);
+      iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
+      write_buffer_16x16(in, output, stride, 1, 0, -cfg->shift[1], bd);
+      break;
+    case FLIPADST_FLIPADST:
+      cfg = &inv_txfm_2d_cfg_adst_adst_16;
+      load_buffer_16x16(coeff, in);
+      transpose_16x16(in, out);
+      iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
+      round_shift_16x16(in, -cfg->shift[0]);
+      transpose_16x16(in, out);
+      iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
+      write_buffer_16x16(in, output, stride, 1, 1, -cfg->shift[1], bd);
+      break;
+    case FLIPADST_ADST:
+      cfg = &inv_txfm_2d_cfg_adst_adst_16;
+      load_buffer_16x16(coeff, in);
+      transpose_16x16(in, out);
+      iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
+      round_shift_16x16(in, -cfg->shift[0]);
+      transpose_16x16(in, out);
+      iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
+      write_buffer_16x16(in, output, stride, 0, 1, -cfg->shift[1], bd);
+      break;
+#endif
+    default: assert(0);
+  }
+}
diff --git a/av1/common/x86/highbd_txfm_utility_sse4.h b/av1/common/x86/highbd_txfm_utility_sse4.h
new file mode 100644
index 0000000..f1e298d
--- /dev/null
+++ b/av1/common/x86/highbd_txfm_utility_sse4.h
@@ -0,0 +1,91 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef _HIGHBD_TXFM_UTILITY_SSE4_H
+#define _HIGHBD_TXFM_UTILITY_SSE4_H
+
+#include <smmintrin.h> /* SSE4.1 */
+
+#define TRANSPOSE_4X4(x0, x1, x2, x3, y0, y1, y2, y3) \
+  do {                                                \
+    __m128i u0, u1, u2, u3;                           \
+    u0 = _mm_unpacklo_epi32(x0, x1);                  \
+    u1 = _mm_unpackhi_epi32(x0, x1);                  \
+    u2 = _mm_unpacklo_epi32(x2, x3);                  \
+    u3 = _mm_unpackhi_epi32(x2, x3);                  \
+    y0 = _mm_unpacklo_epi64(u0, u2);                  \
+    y1 = _mm_unpackhi_epi64(u0, u2);                  \
+    y2 = _mm_unpacklo_epi64(u1, u3);                  \
+    y3 = _mm_unpackhi_epi64(u1, u3);                  \
+  } while (0)
+
+static INLINE void transpose_8x8(const __m128i *in, __m128i *out) {
+  TRANSPOSE_4X4(in[0], in[2], in[4], in[6], out[0], out[2], out[4], out[6]);
+  TRANSPOSE_4X4(in[1], in[3], in[5], in[7], out[8], out[10], out[12], out[14]);
+  TRANSPOSE_4X4(in[8], in[10], in[12], in[14], out[1], out[3], out[5], out[7]);
+  TRANSPOSE_4X4(in[9], in[11], in[13], in[15], out[9], out[11], out[13],
+                out[15]);
+}
+
+static INLINE void transpose_16x16(const __m128i *in, __m128i *out) {
+  // Upper left 8x8
+  TRANSPOSE_4X4(in[0], in[4], in[8], in[12], out[0], out[4], out[8], out[12]);
+  TRANSPOSE_4X4(in[1], in[5], in[9], in[13], out[16], out[20], out[24],
+                out[28]);
+  TRANSPOSE_4X4(in[16], in[20], in[24], in[28], out[1], out[5], out[9],
+                out[13]);
+  TRANSPOSE_4X4(in[17], in[21], in[25], in[29], out[17], out[21], out[25],
+                out[29]);
+
+  // Upper right 8x8
+  TRANSPOSE_4X4(in[2], in[6], in[10], in[14], out[32], out[36], out[40],
+                out[44]);
+  TRANSPOSE_4X4(in[3], in[7], in[11], in[15], out[48], out[52], out[56],
+                out[60]);
+  TRANSPOSE_4X4(in[18], in[22], in[26], in[30], out[33], out[37], out[41],
+                out[45]);
+  TRANSPOSE_4X4(in[19], in[23], in[27], in[31], out[49], out[53], out[57],
+                out[61]);
+
+  // Lower left 8x8
+  TRANSPOSE_4X4(in[32], in[36], in[40], in[44], out[2], out[6], out[10],
+                out[14]);
+  TRANSPOSE_4X4(in[33], in[37], in[41], in[45], out[18], out[22], out[26],
+                out[30]);
+  TRANSPOSE_4X4(in[48], in[52], in[56], in[60], out[3], out[7], out[11],
+                out[15]);
+  TRANSPOSE_4X4(in[49], in[53], in[57], in[61], out[19], out[23], out[27],
+                out[31]);
+  // Lower right 8x8
+  TRANSPOSE_4X4(in[34], in[38], in[42], in[46], out[34], out[38], out[42],
+                out[46]);
+  TRANSPOSE_4X4(in[35], in[39], in[43], in[47], out[50], out[54], out[58],
+                out[62]);
+  TRANSPOSE_4X4(in[50], in[54], in[58], in[62], out[35], out[39], out[43],
+                out[47]);
+  TRANSPOSE_4X4(in[51], in[55], in[59], in[63], out[51], out[55], out[59],
+                out[63]);
+}
+
+// Note:
+//  rounding = 1 << (bit - 1)
+static INLINE __m128i half_btf_sse4_1(__m128i w0, __m128i n0, __m128i w1,
+                                      __m128i n1, __m128i rounding, int bit) {
+  __m128i x, y;
+
+  x = _mm_mullo_epi32(w0, n0);
+  y = _mm_mullo_epi32(w1, n1);
+  x = _mm_add_epi32(x, y);
+  x = _mm_add_epi32(x, rounding);
+  x = _mm_srai_epi32(x, bit);
+  return x;
+}
+
+#endif  // _HIGHBD_TXFM_UTILITY_SSE4_H
diff --git a/av1/common/x86/idct_intrin_sse2.c b/av1/common/x86/idct_intrin_sse2.c
new file mode 100644
index 0000000..70bf9bf
--- /dev/null
+++ b/av1/common/x86/idct_intrin_sse2.c
@@ -0,0 +1,304 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp10_rtcd.h"
+#include "aom_dsp/x86/inv_txfm_sse2.h"
+#include "aom_dsp/x86/txfm_common_sse2.h"
+#include "aom_ports/mem.h"
+#include "av1/common/enums.h"
+
+#if CONFIG_EXT_TX
+static INLINE void fliplr_4x4(__m128i in[2]) {
+  in[0] = _mm_shufflelo_epi16(in[0], 0x1b);
+  in[0] = _mm_shufflehi_epi16(in[0], 0x1b);
+  in[1] = _mm_shufflelo_epi16(in[1], 0x1b);
+  in[1] = _mm_shufflehi_epi16(in[1], 0x1b);
+}
+
+static INLINE void fliplr_8x8(__m128i in[8]) {
+  in[0] = mm_reverse_epi16(in[0]);
+  in[1] = mm_reverse_epi16(in[1]);
+  in[2] = mm_reverse_epi16(in[2]);
+  in[3] = mm_reverse_epi16(in[3]);
+
+  in[4] = mm_reverse_epi16(in[4]);
+  in[5] = mm_reverse_epi16(in[5]);
+  in[6] = mm_reverse_epi16(in[6]);
+  in[7] = mm_reverse_epi16(in[7]);
+}
+
+static INLINE void fliplr_16x8(__m128i in[16]) {
+  fliplr_8x8(&in[0]);
+  fliplr_8x8(&in[8]);
+}
+
+#define FLIPLR_16x16(in0, in1) \
+  do {                         \
+    __m128i *tmp;              \
+    fliplr_16x8(in0);          \
+    fliplr_16x8(in1);          \
+    tmp = (in0);               \
+    (in0) = (in1);             \
+    (in1) = tmp;               \
+  } while (0)
+
+#define FLIPUD_PTR(dest, stride, size)       \
+  do {                                       \
+    (dest) = (dest) + ((size)-1) * (stride); \
+    (stride) = -(stride);                    \
+  } while (0)
+#endif
+
+void vp10_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
+                             int tx_type) {
+  __m128i in[2];
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i eight = _mm_set1_epi16(8);
+
+  in[0] = load_input_data(input);
+  in[1] = load_input_data(input + 8);
+
+  switch (tx_type) {
+    case DCT_DCT:
+      idct4_sse2(in);
+      idct4_sse2(in);
+      break;
+    case ADST_DCT:
+      idct4_sse2(in);
+      iadst4_sse2(in);
+      break;
+    case DCT_ADST:
+      iadst4_sse2(in);
+      idct4_sse2(in);
+      break;
+    case ADST_ADST:
+      iadst4_sse2(in);
+      iadst4_sse2(in);
+      break;
+#if CONFIG_EXT_TX
+    case FLIPADST_DCT:
+      idct4_sse2(in);
+      iadst4_sse2(in);
+      FLIPUD_PTR(dest, stride, 4);
+      break;
+    case DCT_FLIPADST:
+      iadst4_sse2(in);
+      idct4_sse2(in);
+      fliplr_4x4(in);
+      break;
+    case FLIPADST_FLIPADST:
+      iadst4_sse2(in);
+      iadst4_sse2(in);
+      FLIPUD_PTR(dest, stride, 4);
+      fliplr_4x4(in);
+      break;
+    case ADST_FLIPADST:
+      iadst4_sse2(in);
+      iadst4_sse2(in);
+      fliplr_4x4(in);
+      break;
+    case FLIPADST_ADST:
+      iadst4_sse2(in);
+      iadst4_sse2(in);
+      FLIPUD_PTR(dest, stride, 4);
+      break;
+#endif  // CONFIG_EXT_TX
+    default: assert(0); break;
+  }
+
+  // Final round and shift
+  in[0] = _mm_add_epi16(in[0], eight);
+  in[1] = _mm_add_epi16(in[1], eight);
+
+  in[0] = _mm_srai_epi16(in[0], 4);
+  in[1] = _mm_srai_epi16(in[1], 4);
+
+  // Reconstruction and Store
+  {
+    __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 0));
+    __m128i d1 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 1));
+    __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2));
+    __m128i d3 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 3));
+    d0 = _mm_unpacklo_epi32(d0, d1);
+    d2 = _mm_unpacklo_epi32(d2, d3);
+    d0 = _mm_unpacklo_epi8(d0, zero);
+    d2 = _mm_unpacklo_epi8(d2, zero);
+    d0 = _mm_add_epi16(d0, in[0]);
+    d2 = _mm_add_epi16(d2, in[1]);
+    d0 = _mm_packus_epi16(d0, d2);
+    // store result[0]
+    *(int *)dest = _mm_cvtsi128_si32(d0);
+    // store result[1]
+    d0 = _mm_srli_si128(d0, 4);
+    *(int *)(dest + stride) = _mm_cvtsi128_si32(d0);
+    // store result[2]
+    d0 = _mm_srli_si128(d0, 4);
+    *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d0);
+    // store result[3]
+    d0 = _mm_srli_si128(d0, 4);
+    *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d0);
+  }
+}
+
+void vp10_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
+                             int tx_type) {
+  __m128i in[8];
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i final_rounding = _mm_set1_epi16(1 << 4);
+
+  // load input data
+  in[0] = load_input_data(input);
+  in[1] = load_input_data(input + 8 * 1);
+  in[2] = load_input_data(input + 8 * 2);
+  in[3] = load_input_data(input + 8 * 3);
+  in[4] = load_input_data(input + 8 * 4);
+  in[5] = load_input_data(input + 8 * 5);
+  in[6] = load_input_data(input + 8 * 6);
+  in[7] = load_input_data(input + 8 * 7);
+
+  switch (tx_type) {
+    case DCT_DCT:
+      idct8_sse2(in);
+      idct8_sse2(in);
+      break;
+    case ADST_DCT:
+      idct8_sse2(in);
+      iadst8_sse2(in);
+      break;
+    case DCT_ADST:
+      iadst8_sse2(in);
+      idct8_sse2(in);
+      break;
+    case ADST_ADST:
+      iadst8_sse2(in);
+      iadst8_sse2(in);
+      break;
+#if CONFIG_EXT_TX
+    case FLIPADST_DCT:
+      idct8_sse2(in);
+      iadst8_sse2(in);
+      FLIPUD_PTR(dest, stride, 8);
+      break;
+    case DCT_FLIPADST:
+      iadst8_sse2(in);
+      idct8_sse2(in);
+      fliplr_8x8(in);
+      break;
+    case FLIPADST_FLIPADST:
+      iadst8_sse2(in);
+      iadst8_sse2(in);
+      FLIPUD_PTR(dest, stride, 8);
+      fliplr_8x8(in);
+      break;
+    case ADST_FLIPADST:
+      iadst8_sse2(in);
+      iadst8_sse2(in);
+      fliplr_8x8(in);
+      break;
+    case FLIPADST_ADST:
+      iadst8_sse2(in);
+      iadst8_sse2(in);
+      FLIPUD_PTR(dest, stride, 8);
+      break;
+#endif  // CONFIG_EXT_TX
+    default: assert(0); break;
+  }
+
+  // Final rounding and shift
+  in[0] = _mm_adds_epi16(in[0], final_rounding);
+  in[1] = _mm_adds_epi16(in[1], final_rounding);
+  in[2] = _mm_adds_epi16(in[2], final_rounding);
+  in[3] = _mm_adds_epi16(in[3], final_rounding);
+  in[4] = _mm_adds_epi16(in[4], final_rounding);
+  in[5] = _mm_adds_epi16(in[5], final_rounding);
+  in[6] = _mm_adds_epi16(in[6], final_rounding);
+  in[7] = _mm_adds_epi16(in[7], final_rounding);
+
+  in[0] = _mm_srai_epi16(in[0], 5);
+  in[1] = _mm_srai_epi16(in[1], 5);
+  in[2] = _mm_srai_epi16(in[2], 5);
+  in[3] = _mm_srai_epi16(in[3], 5);
+  in[4] = _mm_srai_epi16(in[4], 5);
+  in[5] = _mm_srai_epi16(in[5], 5);
+  in[6] = _mm_srai_epi16(in[6], 5);
+  in[7] = _mm_srai_epi16(in[7], 5);
+
+  RECON_AND_STORE(dest + 0 * stride, in[0]);
+  RECON_AND_STORE(dest + 1 * stride, in[1]);
+  RECON_AND_STORE(dest + 2 * stride, in[2]);
+  RECON_AND_STORE(dest + 3 * stride, in[3]);
+  RECON_AND_STORE(dest + 4 * stride, in[4]);
+  RECON_AND_STORE(dest + 5 * stride, in[5]);
+  RECON_AND_STORE(dest + 6 * stride, in[6]);
+  RECON_AND_STORE(dest + 7 * stride, in[7]);
+}
+
+void vp10_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest,
+                                int stride, int tx_type) {
+  __m128i in[32];
+  __m128i *in0 = &in[0];
+  __m128i *in1 = &in[16];
+
+  load_buffer_8x16(input, in0);
+  input += 8;
+  load_buffer_8x16(input, in1);
+
+  switch (tx_type) {
+    case DCT_DCT:
+      idct16_sse2(in0, in1);
+      idct16_sse2(in0, in1);
+      break;
+    case ADST_DCT:
+      idct16_sse2(in0, in1);
+      iadst16_sse2(in0, in1);
+      break;
+    case DCT_ADST:
+      iadst16_sse2(in0, in1);
+      idct16_sse2(in0, in1);
+      break;
+    case ADST_ADST:
+      iadst16_sse2(in0, in1);
+      iadst16_sse2(in0, in1);
+      break;
+#if CONFIG_EXT_TX
+    case FLIPADST_DCT:
+      idct16_sse2(in0, in1);
+      iadst16_sse2(in0, in1);
+      FLIPUD_PTR(dest, stride, 16);
+      break;
+    case DCT_FLIPADST:
+      iadst16_sse2(in0, in1);
+      idct16_sse2(in0, in1);
+      FLIPLR_16x16(in0, in1);
+      break;
+    case FLIPADST_FLIPADST:
+      iadst16_sse2(in0, in1);
+      iadst16_sse2(in0, in1);
+      FLIPUD_PTR(dest, stride, 16);
+      FLIPLR_16x16(in0, in1);
+      break;
+    case ADST_FLIPADST:
+      iadst16_sse2(in0, in1);
+      iadst16_sse2(in0, in1);
+      FLIPLR_16x16(in0, in1);
+      break;
+    case FLIPADST_ADST:
+      iadst16_sse2(in0, in1);
+      iadst16_sse2(in0, in1);
+      FLIPUD_PTR(dest, stride, 16);
+      break;
+#endif  // CONFIG_EXT_TX
+    default: assert(0); break;
+  }
+
+  write_buffer_8x16(dest, in0, stride);
+  dest += 8;
+  write_buffer_8x16(dest, in1, stride);
+}
diff --git a/av1/common/x86/reconintra_sse4.c b/av1/common/x86/reconintra_sse4.c
new file mode 100644
index 0000000..cac34a6
--- /dev/null
+++ b/av1/common/x86/reconintra_sse4.c
@@ -0,0 +1,891 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include <smmintrin.h>
+
+#include "./vp10_rtcd.h"
+#include "aom_ports/mem.h"
+#include "av1/common/enums.h"
+#include "av1/common/intra_filters.h"
+
+static INLINE void AddPixelsSmall(const uint8_t *above, const uint8_t *left,
+                                  __m128i *sum) {
+  const __m128i a = _mm_loadu_si128((const __m128i *)above);
+  const __m128i l = _mm_loadu_si128((const __m128i *)left);
+  const __m128i zero = _mm_setzero_si128();
+
+  __m128i u0 = _mm_unpacklo_epi8(a, zero);
+  __m128i u1 = _mm_unpacklo_epi8(l, zero);
+
+  sum[0] = _mm_add_epi16(u0, u1);
+}
+
+static INLINE int GetMeanValue4x4(const uint8_t *above, const uint8_t *left,
+                                  __m128i *params) {
+  const __m128i zero = _mm_setzero_si128();
+  __m128i sum_vector, u;
+  uint16_t sum_value;
+
+  AddPixelsSmall(above, left, &sum_vector);
+
+  sum_vector = _mm_hadd_epi16(sum_vector, zero);  // still has 2 values
+  u = _mm_srli_si128(sum_vector, 2);
+  sum_vector = _mm_add_epi16(sum_vector, u);
+
+  sum_value = _mm_extract_epi16(sum_vector, 0);
+  sum_value += 4;
+  sum_value >>= 3;
+  *params = _mm_set1_epi32(sum_value);
+  return sum_value;
+}
+
+static INLINE int GetMeanValue8x8(const uint8_t *above, const uint8_t *left,
+                                  __m128i *params) {
+  const __m128i zero = _mm_setzero_si128();
+  __m128i sum_vector, u;
+  uint16_t sum_value;
+
+  AddPixelsSmall(above, left, &sum_vector);
+
+  sum_vector = _mm_hadd_epi16(sum_vector, zero);  // still has 4 values
+  sum_vector = _mm_hadd_epi16(sum_vector, zero);  // still has 2 values
+
+  u = _mm_srli_si128(sum_vector, 2);
+  sum_vector = _mm_add_epi16(sum_vector, u);
+
+  sum_value = _mm_extract_epi16(sum_vector, 0);
+  sum_value += 8;
+  sum_value >>= 4;
+  *params = _mm_set1_epi32(sum_value);
+  return sum_value;
+}
+
+static INLINE void AddPixelsLarge(const uint8_t *above, const uint8_t *left,
+                                  __m128i *sum) {
+  const __m128i a = _mm_loadu_si128((const __m128i *)above);
+  const __m128i l = _mm_loadu_si128((const __m128i *)left);
+  const __m128i zero = _mm_setzero_si128();
+
+  __m128i u0 = _mm_unpacklo_epi8(a, zero);
+  __m128i u1 = _mm_unpacklo_epi8(l, zero);
+
+  sum[0] = _mm_add_epi16(u0, u1);
+
+  u0 = _mm_unpackhi_epi8(a, zero);
+  u1 = _mm_unpackhi_epi8(l, zero);
+
+  sum[0] = _mm_add_epi16(sum[0], u0);
+  sum[0] = _mm_add_epi16(sum[0], u1);
+}
+
+static INLINE int GetMeanValue16x16(const uint8_t *above, const uint8_t *left,
+                                    __m128i *params) {
+  const __m128i zero = _mm_setzero_si128();
+  __m128i sum_vector, u;
+  uint16_t sum_value;
+
+  AddPixelsLarge(above, left, &sum_vector);
+
+  sum_vector = _mm_hadd_epi16(sum_vector, zero);  // still has 4 values
+  sum_vector = _mm_hadd_epi16(sum_vector, zero);  // still has 2 values
+
+  u = _mm_srli_si128(sum_vector, 2);
+  sum_vector = _mm_add_epi16(sum_vector, u);
+
+  sum_value = _mm_extract_epi16(sum_vector, 0);
+  sum_value += 16;
+  sum_value >>= 5;
+  *params = _mm_set1_epi32(sum_value);
+  return sum_value;
+}
+
+static INLINE int GetMeanValue32x32(const uint8_t *above, const uint8_t *left,
+                                    __m128i *params) {
+  const __m128i zero = _mm_setzero_si128();
+  __m128i sum_vector[2], u;
+  uint16_t sum_value;
+
+  AddPixelsLarge(above, left, &sum_vector[0]);
+  AddPixelsLarge(above + 16, left + 16, &sum_vector[1]);
+
+  sum_vector[0] = _mm_add_epi16(sum_vector[0], sum_vector[1]);
+  sum_vector[0] = _mm_hadd_epi16(sum_vector[0], zero);  // still has 4 values
+  sum_vector[0] = _mm_hadd_epi16(sum_vector[0], zero);  // still has 2 values
+
+  u = _mm_srli_si128(sum_vector[0], 2);
+  sum_vector[0] = _mm_add_epi16(sum_vector[0], u);
+
+  sum_value = _mm_extract_epi16(sum_vector[0], 0);
+  sum_value += 32;
+  sum_value >>= 6;
+  *params = _mm_set1_epi32(sum_value);
+  return sum_value;
+}
+
+// Note:
+//  params[4] : mean value, 4 int32_t repetition
+//
+static INLINE int CalcRefPixelsMeanValue(const uint8_t *above,
+                                         const uint8_t *left, int bs,
+                                         __m128i *params) {
+  int meanValue = 0;
+  switch (bs) {
+    case 4: meanValue = GetMeanValue4x4(above, left, params); break;
+    case 8: meanValue = GetMeanValue8x8(above, left, params); break;
+    case 16: meanValue = GetMeanValue16x16(above, left, params); break;
+    case 32: meanValue = GetMeanValue32x32(above, left, params); break;
+    default: assert(0);
+  }
+  return meanValue;
+}
+
+// Note:
+//  params[0-3] : 4-tap filter coefficients (int32_t per coefficient)
+//
+static INLINE void GetIntraFilterParams(int bs, int mode, __m128i *params) {
+  const TX_SIZE tx_size =
+      (bs == 32) ? TX_32X32
+                 : ((bs == 16) ? TX_16X16 : ((bs == 8) ? TX_8X8 : (TX_4X4)));
+  // c0
+  params[0] = _mm_set_epi32(filter_intra_taps_4[tx_size][mode][0],
+                            filter_intra_taps_4[tx_size][mode][0],
+                            filter_intra_taps_4[tx_size][mode][0],
+                            filter_intra_taps_4[tx_size][mode][0]);
+  // c1
+  params[1] = _mm_set_epi32(filter_intra_taps_4[tx_size][mode][1],
+                            filter_intra_taps_4[tx_size][mode][1],
+                            filter_intra_taps_4[tx_size][mode][1],
+                            filter_intra_taps_4[tx_size][mode][1]);
+  // c2
+  params[2] = _mm_set_epi32(filter_intra_taps_4[tx_size][mode][2],
+                            filter_intra_taps_4[tx_size][mode][2],
+                            filter_intra_taps_4[tx_size][mode][2],
+                            filter_intra_taps_4[tx_size][mode][2]);
+  // c3
+  params[3] = _mm_set_epi32(filter_intra_taps_4[tx_size][mode][3],
+                            filter_intra_taps_4[tx_size][mode][3],
+                            filter_intra_taps_4[tx_size][mode][3],
+                            filter_intra_taps_4[tx_size][mode][3]);
+}
+
+static const int maxBlkSize = 32;
+
+static INLINE void SavePred4x4(int *pred, const __m128i *mean, uint8_t *dst,
+                               ptrdiff_t stride) {
+  const int predStride = (maxBlkSize << 1) + 1;
+  __m128i p0 = _mm_loadu_si128((const __m128i *)pred);
+  __m128i p1 = _mm_loadu_si128((const __m128i *)(pred + predStride));
+  __m128i p2 = _mm_loadu_si128((const __m128i *)(pred + 2 * predStride));
+  __m128i p3 = _mm_loadu_si128((const __m128i *)(pred + 3 * predStride));
+
+  p0 = _mm_add_epi32(p0, mean[0]);
+  p1 = _mm_add_epi32(p1, mean[0]);
+  p2 = _mm_add_epi32(p2, mean[0]);
+  p3 = _mm_add_epi32(p3, mean[0]);
+
+  p0 = _mm_packus_epi32(p0, p1);
+  p1 = _mm_packus_epi32(p2, p3);
+  p0 = _mm_packus_epi16(p0, p1);
+
+  *((int *)dst) = _mm_cvtsi128_si32(p0);
+  p0 = _mm_srli_si128(p0, 4);
+  *((int *)(dst + stride)) = _mm_cvtsi128_si32(p0);
+  p0 = _mm_srli_si128(p0, 4);
+  *((int *)(dst + 2 * stride)) = _mm_cvtsi128_si32(p0);
+  p0 = _mm_srli_si128(p0, 4);
+  *((int *)(dst + 3 * stride)) = _mm_cvtsi128_si32(p0);
+}
+
+static void SavePred8x8(int *pred, const __m128i *mean, uint8_t *dst,
+                        ptrdiff_t stride) {
+  const int predStride = (maxBlkSize << 1) + 1;
+  __m128i p0, p1, p2, p3;
+  int r = 0;
+
+  while (r < 8) {
+    p0 = _mm_loadu_si128((const __m128i *)(pred + r * predStride));
+    p1 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 4));
+    r += 1;
+    p2 = _mm_loadu_si128((const __m128i *)(pred + r * predStride));
+    p3 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 4));
+
+    p0 = _mm_add_epi32(p0, mean[0]);
+    p1 = _mm_add_epi32(p1, mean[0]);
+    p2 = _mm_add_epi32(p2, mean[0]);
+    p3 = _mm_add_epi32(p3, mean[0]);
+
+    p0 = _mm_packus_epi32(p0, p1);
+    p1 = _mm_packus_epi32(p2, p3);
+    p0 = _mm_packus_epi16(p0, p1);
+
+    _mm_storel_epi64((__m128i *)dst, p0);
+    dst += stride;
+    p0 = _mm_srli_si128(p0, 8);
+    _mm_storel_epi64((__m128i *)dst, p0);
+    dst += stride;
+    r += 1;
+  }
+}
+
+static void SavePred16x16(int *pred, const __m128i *mean, uint8_t *dst,
+                          ptrdiff_t stride) {
+  const int predStride = (maxBlkSize << 1) + 1;
+  __m128i p0, p1, p2, p3;
+  int r = 0;
+
+  while (r < 16) {
+    p0 = _mm_loadu_si128((const __m128i *)(pred + r * predStride));
+    p1 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 4));
+    p2 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 8));
+    p3 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 12));
+
+    p0 = _mm_add_epi32(p0, mean[0]);
+    p1 = _mm_add_epi32(p1, mean[0]);
+    p2 = _mm_add_epi32(p2, mean[0]);
+    p3 = _mm_add_epi32(p3, mean[0]);
+
+    p0 = _mm_packus_epi32(p0, p1);
+    p1 = _mm_packus_epi32(p2, p3);
+    p0 = _mm_packus_epi16(p0, p1);
+
+    _mm_storel_epi64((__m128i *)dst, p0);
+    p0 = _mm_srli_si128(p0, 8);
+    _mm_storel_epi64((__m128i *)(dst + 8), p0);
+    dst += stride;
+    r += 1;
+  }
+}
+
+static void SavePred32x32(int *pred, const __m128i *mean, uint8_t *dst,
+                          ptrdiff_t stride) {
+  const int predStride = (maxBlkSize << 1) + 1;
+  __m128i p0, p1, p2, p3, p4, p5, p6, p7;
+  int r = 0;
+
+  while (r < 32) {
+    p0 = _mm_loadu_si128((const __m128i *)(pred + r * predStride));
+    p1 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 4));
+    p2 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 8));
+    p3 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 12));
+
+    p4 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 16));
+    p5 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 20));
+    p6 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 24));
+    p7 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 28));
+
+    p0 = _mm_add_epi32(p0, mean[0]);
+    p1 = _mm_add_epi32(p1, mean[0]);
+    p2 = _mm_add_epi32(p2, mean[0]);
+    p3 = _mm_add_epi32(p3, mean[0]);
+
+    p4 = _mm_add_epi32(p4, mean[0]);
+    p5 = _mm_add_epi32(p5, mean[0]);
+    p6 = _mm_add_epi32(p6, mean[0]);
+    p7 = _mm_add_epi32(p7, mean[0]);
+
+    p0 = _mm_packus_epi32(p0, p1);
+    p1 = _mm_packus_epi32(p2, p3);
+    p0 = _mm_packus_epi16(p0, p1);
+
+    p4 = _mm_packus_epi32(p4, p5);
+    p5 = _mm_packus_epi32(p6, p7);
+    p4 = _mm_packus_epi16(p4, p5);
+
+    _mm_storel_epi64((__m128i *)dst, p0);
+    p0 = _mm_srli_si128(p0, 8);
+    _mm_storel_epi64((__m128i *)(dst + 8), p0);
+
+    _mm_storel_epi64((__m128i *)(dst + 16), p4);
+    p4 = _mm_srli_si128(p4, 8);
+    _mm_storel_epi64((__m128i *)(dst + 24), p4);
+
+    dst += stride;
+    r += 1;
+  }
+}
+
+static void SavePrediction(int *pred, const __m128i *mean, int bs, uint8_t *dst,
+                           ptrdiff_t stride) {
+  switch (bs) {
+    case 4: SavePred4x4(pred, mean, dst, stride); break;
+    case 8: SavePred8x8(pred, mean, dst, stride); break;
+    case 16: SavePred16x16(pred, mean, dst, stride); break;
+    case 32: SavePred32x32(pred, mean, dst, stride); break;
+    default: assert(0);
+  }
+}
+
+typedef void (*ProducePixelsFunc)(__m128i *p, const __m128i *prm, int *pred,
+                                  const int predStride);
+
+static void ProduceFourPixels(__m128i *p, const __m128i *prm, int *pred,
+                              const int predStride) {
+  __m128i u0, u1, u2;
+  int c0 = _mm_extract_epi32(prm[1], 0);
+  int x = *(pred + predStride);
+  int sum;
+
+  u0 = _mm_mullo_epi32(p[0], prm[2]);
+  u1 = _mm_mullo_epi32(p[1], prm[0]);
+  u2 = _mm_mullo_epi32(p[2], prm[3]);
+
+  u0 = _mm_add_epi32(u0, u1);
+  u0 = _mm_add_epi32(u0, u2);
+
+  sum = _mm_extract_epi32(u0, 0);
+  sum += c0 * x;
+  x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
+  *(pred + predStride + 1) = x;
+
+  sum = _mm_extract_epi32(u0, 1);
+  sum += c0 * x;
+  x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
+  *(pred + predStride + 2) = x;
+
+  sum = _mm_extract_epi32(u0, 2);
+  sum += c0 * x;
+  x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
+  *(pred + predStride + 3) = x;
+
+  sum = _mm_extract_epi32(u0, 3);
+  sum += c0 * x;
+  x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
+  *(pred + predStride + 4) = x;
+}
+
+static void ProduceThreePixels(__m128i *p, const __m128i *prm, int *pred,
+                               const int predStride) {
+  __m128i u0, u1, u2;
+  int c0 = _mm_extract_epi32(prm[1], 0);
+  int x = *(pred + predStride);
+  int sum;
+
+  u0 = _mm_mullo_epi32(p[0], prm[2]);
+  u1 = _mm_mullo_epi32(p[1], prm[0]);
+  u2 = _mm_mullo_epi32(p[2], prm[3]);
+
+  u0 = _mm_add_epi32(u0, u1);
+  u0 = _mm_add_epi32(u0, u2);
+
+  sum = _mm_extract_epi32(u0, 0);
+  sum += c0 * x;
+  x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
+  *(pred + predStride + 1) = x;
+
+  sum = _mm_extract_epi32(u0, 1);
+  sum += c0 * x;
+  x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
+  *(pred + predStride + 2) = x;
+
+  sum = _mm_extract_epi32(u0, 2);
+  sum += c0 * x;
+  x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
+  *(pred + predStride + 3) = x;
+}
+
+static void ProduceTwoPixels(__m128i *p, const __m128i *prm, int *pred,
+                             const int predStride) {
+  __m128i u0, u1, u2;
+  int c0 = _mm_extract_epi32(prm[1], 0);
+  int x = *(pred + predStride);
+  int sum;
+
+  u0 = _mm_mullo_epi32(p[0], prm[2]);
+  u1 = _mm_mullo_epi32(p[1], prm[0]);
+  u2 = _mm_mullo_epi32(p[2], prm[3]);
+
+  u0 = _mm_add_epi32(u0, u1);
+  u0 = _mm_add_epi32(u0, u2);
+
+  sum = _mm_extract_epi32(u0, 0);
+  sum += c0 * x;
+  x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
+  *(pred + predStride + 1) = x;
+
+  sum = _mm_extract_epi32(u0, 1);
+  sum += c0 * x;
+  x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
+  *(pred + predStride + 2) = x;
+}
+
+static void ProduceOnePixels(__m128i *p, const __m128i *prm, int *pred,
+                             const int predStride) {
+  __m128i u0, u1, u2;
+  int c0 = _mm_extract_epi32(prm[1], 0);
+  int x = *(pred + predStride);
+  int sum;
+
+  u0 = _mm_mullo_epi32(p[0], prm[2]);
+  u1 = _mm_mullo_epi32(p[1], prm[0]);
+  u2 = _mm_mullo_epi32(p[2], prm[3]);
+
+  u0 = _mm_add_epi32(u0, u1);
+  u0 = _mm_add_epi32(u0, u2);
+
+  sum = _mm_extract_epi32(u0, 0);
+  sum += c0 * x;
+  x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
+  *(pred + predStride + 1) = x;
+}
+
+static ProducePixelsFunc prodPixelsFuncTab[4] = {
+  ProduceOnePixels, ProduceTwoPixels, ProduceThreePixels, ProduceFourPixels
+};
+
+static void ProducePixels(int *pred, const __m128i *prm, int remain) {
+  __m128i p[3];
+  const int predStride = (maxBlkSize << 1) + 1;
+  int index;
+
+  p[0] = _mm_loadu_si128((const __m128i *)pred);
+  p[1] = _mm_loadu_si128((const __m128i *)(pred + 1));
+  p[2] = _mm_loadu_si128((const __m128i *)(pred + 2));
+
+  if (remain <= 2) {
+    return;
+  }
+  if (remain > 5) {
+    index = 3;
+  } else {
+    index = remain - 3;
+  }
+  prodPixelsFuncTab[index](p, prm, pred, predStride);
+}
+
+// Note:
+//  At column index c, the remaining pixels are R = 2 * bs + 1 - r - c
+//  the number of pixels to produce is R - 2 = 2 * bs - r - c - 1
+static void GeneratePrediction(const uint8_t *above, const uint8_t *left,
+                               const int bs, const __m128i *prm, int meanValue,
+                               uint8_t *dst, ptrdiff_t stride) {
+  int pred[33][65];
+  int r, c, colBound;
+  int remainings;
+
+  for (r = 0; r < bs; ++r) {
+    pred[r + 1][0] = (int)left[r] - meanValue;
+  }
+
+  above -= 1;
+  for (c = 0; c < 2 * bs + 1; ++c) {
+    pred[0][c] = (int)above[c] - meanValue;
+  }
+
+  r = 0;
+  c = 0;
+  while (r < bs) {
+    colBound = (bs << 1) - r;
+    for (c = 0; c < colBound; c += 4) {
+      remainings = colBound - c + 1;
+      ProducePixels(&pred[r][c], prm, remainings);
+    }
+    r += 1;
+  }
+
+  SavePrediction(&pred[1][1], &prm[4], bs, dst, stride);
+}
+
+static void FilterPrediction(const uint8_t *above, const uint8_t *left, int bs,
+                             __m128i *prm, uint8_t *dst, ptrdiff_t stride) {
+  int meanValue = 0;
+  meanValue = CalcRefPixelsMeanValue(above, left, bs, &prm[4]);
+  GeneratePrediction(above, left, bs, prm, meanValue, dst, stride);
+}
+
+void vp10_dc_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
+                                     const uint8_t *above,
+                                     const uint8_t *left) {
+  __m128i prm[5];
+  GetIntraFilterParams(bs, DC_PRED, &prm[0]);
+  FilterPrediction(above, left, bs, prm, dst, stride);
+}
+
+void vp10_v_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
+                                    const uint8_t *above, const uint8_t *left) {
+  __m128i prm[5];
+  GetIntraFilterParams(bs, V_PRED, &prm[0]);
+  FilterPrediction(above, left, bs, prm, dst, stride);
+}
+
+void vp10_h_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
+                                    const uint8_t *above, const uint8_t *left) {
+  __m128i prm[5];
+  GetIntraFilterParams(bs, H_PRED, &prm[0]);
+  FilterPrediction(above, left, bs, prm, dst, stride);
+}
+
+void vp10_d45_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
+                                      const uint8_t *above,
+                                      const uint8_t *left) {
+  __m128i prm[5];
+  GetIntraFilterParams(bs, D45_PRED, &prm[0]);
+  FilterPrediction(above, left, bs, prm, dst, stride);
+}
+
+void vp10_d135_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
+                                       const uint8_t *above,
+                                       const uint8_t *left) {
+  __m128i prm[5];
+  GetIntraFilterParams(bs, D135_PRED, &prm[0]);
+  FilterPrediction(above, left, bs, prm, dst, stride);
+}
+
+void vp10_d117_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
+                                       const uint8_t *above,
+                                       const uint8_t *left) {
+  __m128i prm[5];
+  GetIntraFilterParams(bs, D117_PRED, &prm[0]);
+  FilterPrediction(above, left, bs, prm, dst, stride);
+}
+
+void vp10_d153_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
+                                       const uint8_t *above,
+                                       const uint8_t *left) {
+  __m128i prm[5];
+  GetIntraFilterParams(bs, D153_PRED, &prm[0]);
+  FilterPrediction(above, left, bs, prm, dst, stride);
+}
+
+void vp10_d207_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
+                                       const uint8_t *above,
+                                       const uint8_t *left) {
+  __m128i prm[5];
+  GetIntraFilterParams(bs, D207_PRED, &prm[0]);
+  FilterPrediction(above, left, bs, prm, dst, stride);
+}
+
+void vp10_d63_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
+                                      const uint8_t *above,
+                                      const uint8_t *left) {
+  __m128i prm[5];
+  GetIntraFilterParams(bs, D63_PRED, &prm[0]);
+  FilterPrediction(above, left, bs, prm, dst, stride);
+}
+
+void vp10_tm_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
+                                     const uint8_t *above,
+                                     const uint8_t *left) {
+  __m128i prm[5];
+  GetIntraFilterParams(bs, TM_PRED, &prm[0]);
+  FilterPrediction(above, left, bs, prm, dst, stride);
+}
+
+// ============== High Bit Depth ==============
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE int HighbdGetMeanValue4x4(const uint16_t *above,
+                                        const uint16_t *left, const int bd,
+                                        __m128i *params) {
+  const __m128i a = _mm_loadu_si128((const __m128i *)above);
+  const __m128i l = _mm_loadu_si128((const __m128i *)left);
+  const __m128i zero = _mm_setzero_si128();
+  __m128i sum_vector, u;
+  uint16_t sum_value;
+  (void)bd;
+
+  sum_vector = _mm_add_epi16(a, l);
+
+  sum_vector = _mm_hadd_epi16(sum_vector, zero);  // still has 2 values
+  u = _mm_srli_si128(sum_vector, 2);
+  sum_vector = _mm_add_epi16(sum_vector, u);
+
+  sum_value = _mm_extract_epi16(sum_vector, 0);
+  sum_value += 4;
+  sum_value >>= 3;
+  *params = _mm_set1_epi32(sum_value);
+  return sum_value;
+}
+
+static INLINE int HighbdGetMeanValue8x8(const uint16_t *above,
+                                        const uint16_t *left, const int bd,
+                                        __m128i *params) {
+  const __m128i a = _mm_loadu_si128((const __m128i *)above);
+  const __m128i l = _mm_loadu_si128((const __m128i *)left);
+  const __m128i zero = _mm_setzero_si128();
+  __m128i sum_vector, u;
+  uint16_t sum_value;
+  (void)bd;
+
+  sum_vector = _mm_add_epi16(a, l);
+
+  sum_vector = _mm_hadd_epi16(sum_vector, zero);  // still has 4 values
+  sum_vector = _mm_hadd_epi16(sum_vector, zero);  // still has 2 values
+
+  u = _mm_srli_si128(sum_vector, 2);
+  sum_vector = _mm_add_epi16(sum_vector, u);
+
+  sum_value = _mm_extract_epi16(sum_vector, 0);
+  sum_value += 8;
+  sum_value >>= 4;
+  *params = _mm_set1_epi32(sum_value);
+  return sum_value;
+}
+
+// Note:
+//  Process 16 pixels above and left, 10-bit depth
+//  Add to the last 8 pixels sum
+static INLINE void AddPixels10bit(const uint16_t *above, const uint16_t *left,
+                                  __m128i *sum) {
+  __m128i a = _mm_loadu_si128((const __m128i *)above);
+  __m128i l = _mm_loadu_si128((const __m128i *)left);
+  sum[0] = _mm_add_epi16(a, l);
+  a = _mm_loadu_si128((const __m128i *)(above + 8));
+  l = _mm_loadu_si128((const __m128i *)(left + 8));
+  sum[0] = _mm_add_epi16(sum[0], a);
+  sum[0] = _mm_add_epi16(sum[0], l);
+}
+
+// Note:
+//  Process 16 pixels above and left, 12-bit depth
+//  Add to the last 8 pixels sum
+static INLINE void AddPixels12bit(const uint16_t *above, const uint16_t *left,
+                                  __m128i *sum) {
+  __m128i a = _mm_loadu_si128((const __m128i *)above);
+  __m128i l = _mm_loadu_si128((const __m128i *)left);
+  const __m128i zero = _mm_setzero_si128();
+  __m128i v0, v1;
+
+  v0 = _mm_unpacklo_epi16(a, zero);
+  v1 = _mm_unpacklo_epi16(l, zero);
+  sum[0] = _mm_add_epi32(v0, v1);
+
+  v0 = _mm_unpackhi_epi16(a, zero);
+  v1 = _mm_unpackhi_epi16(l, zero);
+  sum[0] = _mm_add_epi32(sum[0], v0);
+  sum[0] = _mm_add_epi32(sum[0], v1);
+
+  a = _mm_loadu_si128((const __m128i *)(above + 8));
+  l = _mm_loadu_si128((const __m128i *)(left + 8));
+
+  v0 = _mm_unpacklo_epi16(a, zero);
+  v1 = _mm_unpacklo_epi16(l, zero);
+  sum[0] = _mm_add_epi32(sum[0], v0);
+  sum[0] = _mm_add_epi32(sum[0], v1);
+
+  v0 = _mm_unpackhi_epi16(a, zero);
+  v1 = _mm_unpackhi_epi16(l, zero);
+  sum[0] = _mm_add_epi32(sum[0], v0);
+  sum[0] = _mm_add_epi32(sum[0], v1);
+}
+
+static INLINE int HighbdGetMeanValue16x16(const uint16_t *above,
+                                          const uint16_t *left, const int bd,
+                                          __m128i *params) {
+  const __m128i zero = _mm_setzero_si128();
+  __m128i sum_vector, u;
+  uint32_t sum_value = 0;
+
+  if (10 == bd) {
+    AddPixels10bit(above, left, &sum_vector);
+    sum_vector = _mm_hadd_epi16(sum_vector, zero);  // still has 4 values
+    sum_vector = _mm_hadd_epi16(sum_vector, zero);  // still has 2 values
+
+    u = _mm_srli_si128(sum_vector, 2);
+    sum_vector = _mm_add_epi16(sum_vector, u);
+    sum_value = _mm_extract_epi16(sum_vector, 0);
+  } else if (12 == bd) {
+    AddPixels12bit(above, left, &sum_vector);
+
+    sum_vector = _mm_hadd_epi32(sum_vector, zero);
+    u = _mm_srli_si128(sum_vector, 4);
+    sum_vector = _mm_add_epi32(u, sum_vector);
+    sum_value = _mm_extract_epi32(sum_vector, 0);
+  }
+
+  sum_value += 16;
+  sum_value >>= 5;
+  *params = _mm_set1_epi32(sum_value);
+  return sum_value;
+}
+
+static INLINE int HighbdGetMeanValue32x32(const uint16_t *above,
+                                          const uint16_t *left, const int bd,
+                                          __m128i *params) {
+  const __m128i zero = _mm_setzero_si128();
+  __m128i sum_vector[2], u;
+  uint32_t sum_value = 0;
+
+  if (10 == bd) {
+    AddPixels10bit(above, left, &sum_vector[0]);
+    AddPixels10bit(above + 16, left + 16, &sum_vector[1]);
+
+    sum_vector[0] = _mm_add_epi16(sum_vector[0], sum_vector[1]);
+    sum_vector[0] = _mm_hadd_epi16(sum_vector[0], zero);  // still has 4 values
+    sum_vector[0] = _mm_hadd_epi16(sum_vector[0], zero);  // still has 2 values
+
+    u = _mm_srli_si128(sum_vector[0], 2);
+    sum_vector[0] = _mm_add_epi16(sum_vector[0], u);
+    sum_value = _mm_extract_epi16(sum_vector[0], 0);
+  } else if (12 == bd) {
+    AddPixels12bit(above, left, &sum_vector[0]);
+    AddPixels12bit(above + 16, left + 16, &sum_vector[1]);
+
+    sum_vector[0] = _mm_add_epi32(sum_vector[0], sum_vector[1]);
+    sum_vector[0] = _mm_hadd_epi32(sum_vector[0], zero);
+    u = _mm_srli_si128(sum_vector[0], 4);
+    sum_vector[0] = _mm_add_epi32(u, sum_vector[0]);
+    sum_value = _mm_extract_epi32(sum_vector[0], 0);
+  }
+
+  sum_value += 32;
+  sum_value >>= 6;
+  *params = _mm_set1_epi32(sum_value);
+  return sum_value;
+}
+
+// Note:
+//  params[4] : mean value, 4 int32_t repetition
+//
+static INLINE int HighbdCalcRefPixelsMeanValue(const uint16_t *above,
+                                               const uint16_t *left, int bs,
+                                               const int bd, __m128i *params) {
+  int meanValue = 0;
+  switch (bs) {
+    case 4: meanValue = HighbdGetMeanValue4x4(above, left, bd, params); break;
+    case 8: meanValue = HighbdGetMeanValue8x8(above, left, bd, params); break;
+    case 16:
+      meanValue = HighbdGetMeanValue16x16(above, left, bd, params);
+      break;
+    case 32:
+      meanValue = HighbdGetMeanValue32x32(above, left, bd, params);
+      break;
+    default: assert(0);
+  }
+  return meanValue;
+}
+
+// Note:
+//  At column index c, the remaining pixels are R = 2 * bs + 1 - r - c
+//  the number of pixels to produce is R - 2 = 2 * bs - r - c - 1
+static void HighbdGeneratePrediction(const uint16_t *above,
+                                     const uint16_t *left, const int bs,
+                                     const int bd, const __m128i *prm,
+                                     int meanValue, uint16_t *dst,
+                                     ptrdiff_t stride) {
+  int pred[33][65];
+  int r, c, colBound;
+  int remainings;
+  int ipred;
+
+  for (r = 0; r < bs; ++r) {
+    pred[r + 1][0] = (int)left[r] - meanValue;
+  }
+
+  above -= 1;
+  for (c = 0; c < 2 * bs + 1; ++c) {
+    pred[0][c] = (int)above[c] - meanValue;
+  }
+
+  r = 0;
+  c = 0;
+  while (r < bs) {
+    colBound = (bs << 1) - r;
+    for (c = 0; c < colBound; c += 4) {
+      remainings = colBound - c + 1;
+      ProducePixels(&pred[r][c], prm, remainings);
+    }
+    r += 1;
+  }
+
+  for (r = 0; r < bs; ++r) {
+    for (c = 0; c < bs; ++c) {
+      ipred = pred[r + 1][c + 1] + meanValue;
+      dst[c] = clip_pixel_highbd(ipred, bd);
+    }
+    dst += stride;
+  }
+}
+
+static void HighbdFilterPrediction(const uint16_t *above, const uint16_t *left,
+                                   int bs, const int bd, __m128i *prm,
+                                   uint16_t *dst, ptrdiff_t stride) {
+  int meanValue = 0;
+  meanValue = HighbdCalcRefPixelsMeanValue(above, left, bs, bd, &prm[4]);
+  HighbdGeneratePrediction(above, left, bs, bd, prm, meanValue, dst, stride);
+}
+
+void vp10_highbd_dc_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
+                                            int bs, const uint16_t *above,
+                                            const uint16_t *left, int bd) {
+  __m128i prm[5];
+  GetIntraFilterParams(bs, DC_PRED, &prm[0]);
+  HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
+}
+
+void vp10_highbd_v_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
+                                           int bs, const uint16_t *above,
+                                           const uint16_t *left, int bd) {
+  __m128i prm[5];
+  GetIntraFilterParams(bs, V_PRED, &prm[0]);
+  HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
+}
+
+void vp10_highbd_h_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
+                                           int bs, const uint16_t *above,
+                                           const uint16_t *left, int bd) {
+  __m128i prm[5];
+  GetIntraFilterParams(bs, H_PRED, &prm[0]);
+  HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
+}
+
+void vp10_highbd_d45_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
+                                             int bs, const uint16_t *above,
+                                             const uint16_t *left, int bd) {
+  __m128i prm[5];
+  GetIntraFilterParams(bs, D45_PRED, &prm[0]);
+  HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
+}
+
+void vp10_highbd_d135_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
+                                              int bs, const uint16_t *above,
+                                              const uint16_t *left, int bd) {
+  __m128i prm[5];
+  GetIntraFilterParams(bs, D135_PRED, &prm[0]);
+  HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
+}
+
+void vp10_highbd_d117_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
+                                              int bs, const uint16_t *above,
+                                              const uint16_t *left, int bd) {
+  __m128i prm[5];
+  GetIntraFilterParams(bs, D117_PRED, &prm[0]);
+  HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
+}
+
+void vp10_highbd_d153_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
+                                              int bs, const uint16_t *above,
+                                              const uint16_t *left, int bd) {
+  __m128i prm[5];
+  GetIntraFilterParams(bs, D153_PRED, &prm[0]);
+  HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
+}
+
+void vp10_highbd_d207_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
+                                              int bs, const uint16_t *above,
+                                              const uint16_t *left, int bd) {
+  __m128i prm[5];
+  GetIntraFilterParams(bs, D207_PRED, &prm[0]);
+  HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
+}
+
+void vp10_highbd_d63_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
+                                             int bs, const uint16_t *above,
+                                             const uint16_t *left, int bd) {
+  __m128i prm[5];
+  GetIntraFilterParams(bs, D63_PRED, &prm[0]);
+  HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
+}
+
+void vp10_highbd_tm_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
+                                            int bs, const uint16_t *above,
+                                            const uint16_t *left, int bd) {
+  __m128i prm[5];
+  GetIntraFilterParams(bs, TM_PRED, &prm[0]);
+  HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
diff --git a/av1/common/x86/vp10_convolve_filters_ssse3.c b/av1/common/x86/vp10_convolve_filters_ssse3.c
new file mode 100644
index 0000000..b842589
--- /dev/null
+++ b/av1/common/x86/vp10_convolve_filters_ssse3.c
@@ -0,0 +1,660 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "./vpx_config.h"
+#include "av1/common/filter.h"
+
+#if CONFIG_EXT_INTERP
+DECLARE_ALIGNED(16, const int8_t,
+                sub_pel_filters_10sharp_signal_dir[15][2][16]) = {
+  {
+      { 0, 0, -1, 3, -6, 127, 8, -4, 2, -1, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 0, -1, 3, -6, 127, 8, -4, 2, -1, 0, 0, 0, 0 },
+  },
+  {
+      { 0, 1, -2, 5, -12, 124, 18, -7, 3, -2, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 1, -2, 5, -12, 124, 18, -7, 3, -2, 0, 0, 0, 0 },
+  },
+  {
+      { 0, 1, -3, 7, -17, 119, 28, -11, 5, -2, 1, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 1, -3, 7, -17, 119, 28, -11, 5, -2, 1, 0, 0, 0 },
+  },
+  {
+      { 0, 1, -4, 8, -20, 114, 38, -14, 7, -3, 1, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 1, -4, 8, -20, 114, 38, -14, 7, -3, 1, 0, 0, 0 },
+  },
+  {
+      { 0, 1, -4, 9, -22, 107, 49, -17, 8, -4, 1, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 1, -4, 9, -22, 107, 49, -17, 8, -4, 1, 0, 0, 0 },
+  },
+  {
+      { 0, 2, -5, 10, -24, 99, 59, -20, 9, -4, 2, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 2, -5, 10, -24, 99, 59, -20, 9, -4, 2, 0, 0, 0 },
+  },
+  {
+      { 0, 2, -5, 10, -24, 90, 70, -22, 10, -5, 2, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 2, -5, 10, -24, 90, 70, -22, 10, -5, 2, 0, 0, 0 },
+  },
+  {
+      { 0, 2, -5, 10, -23, 80, 80, -23, 10, -5, 2, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 2, -5, 10, -23, 80, 80, -23, 10, -5, 2, 0, 0, 0 },
+  },
+  {
+      { 0, 2, -5, 10, -22, 70, 90, -24, 10, -5, 2, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 2, -5, 10, -22, 70, 90, -24, 10, -5, 2, 0, 0, 0 },
+  },
+  {
+      { 0, 2, -4, 9, -20, 59, 99, -24, 10, -5, 2, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 2, -4, 9, -20, 59, 99, -24, 10, -5, 2, 0, 0, 0 },
+  },
+  {
+      { 0, 1, -4, 8, -17, 49, 107, -22, 9, -4, 1, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 1, -4, 8, -17, 49, 107, -22, 9, -4, 1, 0, 0, 0 },
+  },
+  {
+      { 0, 1, -3, 7, -14, 38, 114, -20, 8, -4, 1, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 1, -3, 7, -14, 38, 114, -20, 8, -4, 1, 0, 0, 0 },
+  },
+  {
+      { 0, 1, -2, 5, -11, 28, 119, -17, 7, -3, 1, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 1, -2, 5, -11, 28, 119, -17, 7, -3, 1, 0, 0, 0 },
+  },
+  {
+      { 0, 0, -2, 3, -7, 18, 124, -12, 5, -2, 1, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 0, -2, 3, -7, 18, 124, -12, 5, -2, 1, 0, 0, 0 },
+  },
+  {
+      { 0, 0, -1, 2, -4, 8, 127, -6, 3, -1, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 0, -1, 2, -4, 8, 127, -6, 3, -1, 0, 0, 0, 0 },
+  },
+};
+#endif
+#if CONFIG_EXT_INTERP
+DECLARE_ALIGNED(16, const int8_t,
+                sub_pel_filters_10sharp_ver_signal_dir[15][6][16]) = {
+  {
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 },
+      { -6, 127, -6, 127, -6, 127, -6, 127, -6, 127, -6, 127, -6, 127, -6,
+        127 },
+      { 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4 },
+      { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+  },
+  {
+      { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+      { -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5 },
+      { -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124,
+        -12, 124 },
+      { 18, -7, 18, -7, 18, -7, 18, -7, 18, -7, 18, -7, 18, -7, 18, -7 },
+      { 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+  },
+  {
+      { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+      { -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7 },
+      { -17, 119, -17, 119, -17, 119, -17, 119, -17, 119, -17, 119, -17, 119,
+        -17, 119 },
+      { 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28,
+        -11 },
+      { 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2 },
+      { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+  },
+  {
+      { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+      { -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8 },
+      { -20, 114, -20, 114, -20, 114, -20, 114, -20, 114, -20, 114, -20, 114,
+        -20, 114 },
+      { 38, -14, 38, -14, 38, -14, 38, -14, 38, -14, 38, -14, 38, -14, 38,
+        -14 },
+      { 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3 },
+      { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+  },
+  {
+      { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+      { -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9 },
+      { -22, 107, -22, 107, -22, 107, -22, 107, -22, 107, -22, 107, -22, 107,
+        -22, 107 },
+      { 49, -17, 49, -17, 49, -17, 49, -17, 49, -17, 49, -17, 49, -17, 49,
+        -17 },
+      { 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4 },
+      { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+  },
+  {
+      { 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2 },
+      { -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10 },
+      { -24, 99, -24, 99, -24, 99, -24, 99, -24, 99, -24, 99, -24, 99, -24,
+        99 },
+      { 59, -20, 59, -20, 59, -20, 59, -20, 59, -20, 59, -20, 59, -20, 59,
+        -20 },
+      { 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4 },
+      { 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0 },
+  },
+  {
+      { 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2 },
+      { -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10 },
+      { -24, 90, -24, 90, -24, 90, -24, 90, -24, 90, -24, 90, -24, 90, -24,
+        90 },
+      { 70, -22, 70, -22, 70, -22, 70, -22, 70, -22, 70, -22, 70, -22, 70,
+        -22 },
+      { 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5 },
+      { 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0 },
+  },
+  {
+      { 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2 },
+      { -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10 },
+      { -23, 80, -23, 80, -23, 80, -23, 80, -23, 80, -23, 80, -23, 80, -23,
+        80 },
+      { 80, -23, 80, -23, 80, -23, 80, -23, 80, -23, 80, -23, 80, -23, 80,
+        -23 },
+      { 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5 },
+      { 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0 },
+  },
+  {
+      { 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2 },
+      { -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10 },
+      { -22, 70, -22, 70, -22, 70, -22, 70, -22, 70, -22, 70, -22, 70, -22,
+        70 },
+      { 90, -24, 90, -24, 90, -24, 90, -24, 90, -24, 90, -24, 90, -24, 90,
+        -24 },
+      { 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5 },
+      { 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0 },
+  },
+  {
+      { 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2 },
+      { -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9 },
+      { -20, 59, -20, 59, -20, 59, -20, 59, -20, 59, -20, 59, -20, 59, -20,
+        59 },
+      { 99, -24, 99, -24, 99, -24, 99, -24, 99, -24, 99, -24, 99, -24, 99,
+        -24 },
+      { 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5 },
+      { 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0 },
+  },
+  {
+      { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+      { -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8 },
+      { -17, 49, -17, 49, -17, 49, -17, 49, -17, 49, -17, 49, -17, 49, -17,
+        49 },
+      { 107, -22, 107, -22, 107, -22, 107, -22, 107, -22, 107, -22, 107, -22,
+        107, -22 },
+      { 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4 },
+      { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+  },
+  {
+      { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+      { -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7 },
+      { -14, 38, -14, 38, -14, 38, -14, 38, -14, 38, -14, 38, -14, 38, -14,
+        38 },
+      { 114, -20, 114, -20, 114, -20, 114, -20, 114, -20, 114, -20, 114, -20,
+        114, -20 },
+      { 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4 },
+      { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+  },
+  {
+      { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+      { -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5 },
+      { -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11,
+        28 },
+      { 119, -17, 119, -17, 119, -17, 119, -17, 119, -17, 119, -17, 119, -17,
+        119, -17 },
+      { 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3 },
+      { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+  },
+  {
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3 },
+      { -7, 18, -7, 18, -7, 18, -7, 18, -7, 18, -7, 18, -7, 18, -7, 18 },
+      { 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12,
+        124, -12 },
+      { 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2 },
+      { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+  },
+  {
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 },
+      { -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8 },
+      { 127, -6, 127, -6, 127, -6, 127, -6, 127, -6, 127, -6, 127, -6, 127,
+        -6 },
+      { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+  },
+};
+#endif
+#if CONFIG_EXT_INTERP
+DECLARE_ALIGNED(16, const int8_t,
+                sub_pel_filters_12sharp_signal_dir[15][2][16]) = {
+  {
+      { 0, 1, -2, 3, -7, 127, 8, -4, 2, -1, 1, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 1, -2, 3, -7, 127, 8, -4, 2, -1, 1, 0, 0, 0 },
+  },
+  {
+      { -1, 2, -3, 6, -13, 124, 18, -8, 4, -2, 2, -1, 0, 0, 0, 0 },
+      { 0, 0, -1, 2, -3, 6, -13, 124, 18, -8, 4, -2, 2, -1, 0, 0 },
+  },
+  {
+      { -1, 3, -4, 8, -18, 120, 28, -12, 7, -4, 2, -1, 0, 0, 0, 0 },
+      { 0, 0, -1, 3, -4, 8, -18, 120, 28, -12, 7, -4, 2, -1, 0, 0 },
+  },
+  {
+      { -1, 3, -6, 10, -21, 115, 38, -15, 8, -5, 3, -1, 0, 0, 0, 0 },
+      { 0, 0, -1, 3, -6, 10, -21, 115, 38, -15, 8, -5, 3, -1, 0, 0 },
+  },
+  {
+      { -2, 4, -6, 12, -24, 108, 49, -18, 10, -6, 3, -2, 0, 0, 0, 0 },
+      { 0, 0, -2, 4, -6, 12, -24, 108, 49, -18, 10, -6, 3, -2, 0, 0 },
+  },
+  {
+      { -2, 4, -7, 13, -25, 100, 60, -21, 11, -7, 4, -2, 0, 0, 0, 0 },
+      { 0, 0, -2, 4, -7, 13, -25, 100, 60, -21, 11, -7, 4, -2, 0, 0 },
+  },
+  {
+      { -2, 4, -7, 13, -26, 91, 71, -24, 13, -7, 4, -2, 0, 0, 0, 0 },
+      { 0, 0, -2, 4, -7, 13, -26, 91, 71, -24, 13, -7, 4, -2, 0, 0 },
+  },
+  {
+      { -2, 4, -7, 13, -25, 81, 81, -25, 13, -7, 4, -2, 0, 0, 0, 0 },
+      { 0, 0, -2, 4, -7, 13, -25, 81, 81, -25, 13, -7, 4, -2, 0, 0 },
+  },
+  {
+      { -2, 4, -7, 13, -24, 71, 91, -26, 13, -7, 4, -2, 0, 0, 0, 0 },
+      { 0, 0, -2, 4, -7, 13, -24, 71, 91, -26, 13, -7, 4, -2, 0, 0 },
+  },
+  {
+      { -2, 4, -7, 11, -21, 60, 100, -25, 13, -7, 4, -2, 0, 0, 0, 0 },
+      { 0, 0, -2, 4, -7, 11, -21, 60, 100, -25, 13, -7, 4, -2, 0, 0 },
+  },
+  {
+      { -2, 3, -6, 10, -18, 49, 108, -24, 12, -6, 4, -2, 0, 0, 0, 0 },
+      { 0, 0, -2, 3, -6, 10, -18, 49, 108, -24, 12, -6, 4, -2, 0, 0 },
+  },
+  {
+      { -1, 3, -5, 8, -15, 38, 115, -21, 10, -6, 3, -1, 0, 0, 0, 0 },
+      { 0, 0, -1, 3, -5, 8, -15, 38, 115, -21, 10, -6, 3, -1, 0, 0 },
+  },
+  {
+      { -1, 2, -4, 7, -12, 28, 120, -18, 8, -4, 3, -1, 0, 0, 0, 0 },
+      { 0, 0, -1, 2, -4, 7, -12, 28, 120, -18, 8, -4, 3, -1, 0, 0 },
+  },
+  {
+      { -1, 2, -2, 4, -8, 18, 124, -13, 6, -3, 2, -1, 0, 0, 0, 0 },
+      { 0, 0, -1, 2, -2, 4, -8, 18, 124, -13, 6, -3, 2, -1, 0, 0 },
+  },
+  {
+      { 0, 1, -1, 2, -4, 8, 127, -7, 3, -2, 1, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 1, -1, 2, -4, 8, 127, -7, 3, -2, 1, 0, 0, 0 },
+  },
+};
+#endif
+#if CONFIG_EXT_INTERP
+DECLARE_ALIGNED(16, const int8_t,
+                sub_pel_filters_12sharp_ver_signal_dir[15][6][16]) = {
+  {
+      { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+      { -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3 },
+      { -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7,
+        127 },
+      { 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4 },
+      { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 },
+      { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+  },
+  {
+      { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 },
+      { -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6 },
+      { -13, 124, -13, 124, -13, 124, -13, 124, -13, 124, -13, 124, -13, 124,
+        -13, 124 },
+      { 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8 },
+      { 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2 },
+      { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 },
+  },
+  {
+      { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 },
+      { -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8 },
+      { -18, 120, -18, 120, -18, 120, -18, 120, -18, 120, -18, 120, -18, 120,
+        -18, 120 },
+      { 28, -12, 28, -12, 28, -12, 28, -12, 28, -12, 28, -12, 28, -12, 28,
+        -12 },
+      { 7, -4, 7, -4, 7, -4, 7, -4, 7, -4, 7, -4, 7, -4, 7, -4 },
+      { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 },
+  },
+  {
+      { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 },
+      { -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10 },
+      { -21, 115, -21, 115, -21, 115, -21, 115, -21, 115, -21, 115, -21, 115,
+        -21, 115 },
+      { 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38,
+        -15 },
+      { 8, -5, 8, -5, 8, -5, 8, -5, 8, -5, 8, -5, 8, -5, 8, -5 },
+      { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 },
+  },
+  {
+      { -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4 },
+      { -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12 },
+      { -24, 108, -24, 108, -24, 108, -24, 108, -24, 108, -24, 108, -24, 108,
+        -24, 108 },
+      { 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49,
+        -18 },
+      { 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6 },
+      { 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2 },
+  },
+  {
+      { -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4 },
+      { -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13 },
+      { -25, 100, -25, 100, -25, 100, -25, 100, -25, 100, -25, 100, -25, 100,
+        -25, 100 },
+      { 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60,
+        -21 },
+      { 11, -7, 11, -7, 11, -7, 11, -7, 11, -7, 11, -7, 11, -7, 11, -7 },
+      { 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2 },
+  },
+  {
+      { -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4 },
+      { -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13 },
+      { -26, 91, -26, 91, -26, 91, -26, 91, -26, 91, -26, 91, -26, 91, -26,
+        91 },
+      { 71, -24, 71, -24, 71, -24, 71, -24, 71, -24, 71, -24, 71, -24, 71,
+        -24 },
+      { 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7 },
+      { 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2 },
+  },
+  {
+      { -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4 },
+      { -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13 },
+      { -25, 81, -25, 81, -25, 81, -25, 81, -25, 81, -25, 81, -25, 81, -25,
+        81 },
+      { 81, -25, 81, -25, 81, -25, 81, -25, 81, -25, 81, -25, 81, -25, 81,
+        -25 },
+      { 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7 },
+      { 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2 },
+  },
+  {
+      { -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4 },
+      { -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13 },
+      { -24, 71, -24, 71, -24, 71, -24, 71, -24, 71, -24, 71, -24, 71, -24,
+        71 },
+      { 91, -26, 91, -26, 91, -26, 91, -26, 91, -26, 91, -26, 91, -26, 91,
+        -26 },
+      { 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7 },
+      { 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2 },
+  },
+  {
+      { -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4 },
+      { -7, 11, -7, 11, -7, 11, -7, 11, -7, 11, -7, 11, -7, 11, -7, 11 },
+      { -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21,
+        60 },
+      { 100, -25, 100, -25, 100, -25, 100, -25, 100, -25, 100, -25, 100, -25,
+        100, -25 },
+      { 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7 },
+      { 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2 },
+  },
+  {
+      { -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3 },
+      { -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10 },
+      { -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18,
+        49 },
+      { 108, -24, 108, -24, 108, -24, 108, -24, 108, -24, 108, -24, 108, -24,
+        108, -24 },
+      { 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6 },
+      { 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2 },
+  },
+  {
+      { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 },
+      { -5, 8, -5, 8, -5, 8, -5, 8, -5, 8, -5, 8, -5, 8, -5, 8 },
+      { -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15,
+        38 },
+      { 115, -21, 115, -21, 115, -21, 115, -21, 115, -21, 115, -21, 115, -21,
+        115, -21 },
+      { 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6 },
+      { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 },
+  },
+  {
+      { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 },
+      { -4, 7, -4, 7, -4, 7, -4, 7, -4, 7, -4, 7, -4, 7, -4, 7 },
+      { -12, 28, -12, 28, -12, 28, -12, 28, -12, 28, -12, 28, -12, 28, -12,
+        28 },
+      { 120, -18, 120, -18, 120, -18, 120, -18, 120, -18, 120, -18, 120, -18,
+        120, -18 },
+      { 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4 },
+      { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 },
+  },
+  {
+      { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 },
+      { -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4 },
+      { -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18 },
+      { 124, -13, 124, -13, 124, -13, 124, -13, 124, -13, 124, -13, 124, -13,
+        124, -13 },
+      { 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3 },
+      { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 },
+  },
+  {
+      { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+      { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 },
+      { -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8 },
+      { 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127,
+        -7 },
+      { 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2 },
+      { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+  },
+};
+#endif
+#if USE_TEMPORALFILTER_12TAP
+DECLARE_ALIGNED(16, const int8_t,
+                sub_pel_filters_temporalfilter_12_signal_dir[15][2][16]) = {
+  {
+      { 0, 1, -1, 3, -7, 127, 8, -4, 2, -1, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 1, -1, 3, -7, 127, 8, -4, 2, -1, 0, 0, 0, 0 },
+  },
+  {
+      { 0, 1, -3, 5, -12, 124, 18, -8, 4, -2, 1, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 1, -3, 5, -12, 124, 18, -8, 4, -2, 1, 0, 0, 0 },
+  },
+  {
+      { -1, 2, -4, 8, -17, 120, 28, -11, 6, -3, 1, -1, 0, 0, 0, 0 },
+      { 0, 0, -1, 2, -4, 8, -17, 120, 28, -11, 6, -3, 1, -1, 0, 0 },
+  },
+  {
+      { -1, 2, -4, 10, -21, 114, 38, -15, 8, -4, 2, -1, 0, 0, 0, 0 },
+      { 0, 0, -1, 2, -4, 10, -21, 114, 38, -15, 8, -4, 2, -1, 0, 0 },
+  },
+  {
+      { -1, 3, -5, 11, -23, 107, 49, -18, 9, -5, 2, -1, 0, 0, 0, 0 },
+      { 0, 0, -1, 3, -5, 11, -23, 107, 49, -18, 9, -5, 2, -1, 0, 0 },
+  },
+  {
+      { -1, 3, -6, 12, -25, 99, 60, -21, 11, -6, 3, -1, 0, 0, 0, 0 },
+      { 0, 0, -1, 3, -6, 12, -25, 99, 60, -21, 11, -6, 3, -1, 0, 0 },
+  },
+  {
+      { -1, 3, -6, 12, -25, 90, 70, -23, 12, -6, 3, -1, 0, 0, 0, 0 },
+      { 0, 0, -1, 3, -6, 12, -25, 90, 70, -23, 12, -6, 3, -1, 0, 0 },
+  },
+  {
+      { -1, 3, -6, 12, -24, 80, 80, -24, 12, -6, 3, -1, 0, 0, 0, 0 },
+      { 0, 0, -1, 3, -6, 12, -24, 80, 80, -24, 12, -6, 3, -1, 0, 0 },
+  },
+  {
+      { -1, 3, -6, 12, -23, 70, 90, -25, 12, -6, 3, -1, 0, 0, 0, 0 },
+      { 0, 0, -1, 3, -6, 12, -23, 70, 90, -25, 12, -6, 3, -1, 0, 0 },
+  },
+  {
+      { -1, 3, -6, 11, -21, 60, 99, -25, 12, -6, 3, -1, 0, 0, 0, 0 },
+      { 0, 0, -1, 3, -6, 11, -21, 60, 99, -25, 12, -6, 3, -1, 0, 0 },
+  },
+  {
+      { -1, 2, -5, 9, -18, 49, 107, -23, 11, -5, 3, -1, 0, 0, 0, 0 },
+      { 0, 0, -1, 2, -5, 9, -18, 49, 107, -23, 11, -5, 3, -1, 0, 0 },
+  },
+  {
+      { -1, 2, -4, 8, -15, 38, 114, -21, 10, -4, 2, -1, 0, 0, 0, 0 },
+      { 0, 0, -1, 2, -4, 8, -15, 38, 114, -21, 10, -4, 2, -1, 0, 0 },
+  },
+  {
+      { -1, 1, -3, 6, -11, 28, 120, -17, 8, -4, 2, -1, 0, 0, 0, 0 },
+      { 0, 0, -1, 1, -3, 6, -11, 28, 120, -17, 8, -4, 2, -1, 0, 0 },
+  },
+  {
+      { 0, 1, -2, 4, -8, 18, 124, -12, 5, -3, 1, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 1, -2, 4, -8, 18, 124, -12, 5, -3, 1, 0, 0, 0 },
+  },
+  {
+      { 0, 0, -1, 2, -4, 8, 127, -7, 3, -1, 1, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 0, -1, 2, -4, 8, 127, -7, 3, -1, 1, 0, 0, 0 },
+  },
+};
+#endif
+#if USE_TEMPORALFILTER_12TAP
+DECLARE_ALIGNED(16, const int8_t,
+                sub_pel_filters_temporalfilter_12_ver_signal_dir[15][6][16]) = {
+  {
+      { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+      { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 },
+      { -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7,
+        127 },
+      { 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4 },
+      { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+  },
+  {
+      { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+      { -3, 5, -3, 5, -3, 5, -3, 5, -3, 5, -3, 5, -3, 5, -3, 5 },
+      { -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124,
+        -12, 124 },
+      { 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8 },
+      { 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2 },
+      { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+  },
+  {
+      { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 },
+      { -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8 },
+      { -17, 120, -17, 120, -17, 120, -17, 120, -17, 120, -17, 120, -17, 120,
+        -17, 120 },
+      { 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28,
+        -11 },
+      { 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3 },
+      { 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1 },
+  },
+  {
+      { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 },
+      { -4, 10, -4, 10, -4, 10, -4, 10, -4, 10, -4, 10, -4, 10, -4, 10 },
+      { -21, 114, -21, 114, -21, 114, -21, 114, -21, 114, -21, 114, -21, 114,
+        -21, 114 },
+      { 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38,
+        -15 },
+      { 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4 },
+      { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 },
+  },
+  {
+      { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 },
+      { -5, 11, -5, 11, -5, 11, -5, 11, -5, 11, -5, 11, -5, 11, -5, 11 },
+      { -23, 107, -23, 107, -23, 107, -23, 107, -23, 107, -23, 107, -23, 107,
+        -23, 107 },
+      { 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49,
+        -18 },
+      { 9, -5, 9, -5, 9, -5, 9, -5, 9, -5, 9, -5, 9, -5, 9, -5 },
+      { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 },
+  },
+  {
+      { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 },
+      { -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12 },
+      { -25, 99, -25, 99, -25, 99, -25, 99, -25, 99, -25, 99, -25, 99, -25,
+        99 },
+      { 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60,
+        -21 },
+      { 11, -6, 11, -6, 11, -6, 11, -6, 11, -6, 11, -6, 11, -6, 11, -6 },
+      { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 },
+  },
+  {
+      { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 },
+      { -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12 },
+      { -25, 90, -25, 90, -25, 90, -25, 90, -25, 90, -25, 90, -25, 90, -25,
+        90 },
+      { 70, -23, 70, -23, 70, -23, 70, -23, 70, -23, 70, -23, 70, -23, 70,
+        -23 },
+      { 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6 },
+      { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 },
+  },
+  {
+      { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 },
+      { -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12 },
+      { -24, 80, -24, 80, -24, 80, -24, 80, -24, 80, -24, 80, -24, 80, -24,
+        80 },
+      { 80, -24, 80, -24, 80, -24, 80, -24, 80, -24, 80, -24, 80, -24, 80,
+        -24 },
+      { 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6 },
+      { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 },
+  },
+  {
+      { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 },
+      { -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12 },
+      { -23, 70, -23, 70, -23, 70, -23, 70, -23, 70, -23, 70, -23, 70, -23,
+        70 },
+      { 90, -25, 90, -25, 90, -25, 90, -25, 90, -25, 90, -25, 90, -25, 90,
+        -25 },
+      { 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6 },
+      { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 },
+  },
+  {
+      { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 },
+      { -6, 11, -6, 11, -6, 11, -6, 11, -6, 11, -6, 11, -6, 11, -6, 11 },
+      { -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21,
+        60 },
+      { 99, -25, 99, -25, 99, -25, 99, -25, 99, -25, 99, -25, 99, -25, 99,
+        -25 },
+      { 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6 },
+      { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 },
+  },
+  {
+      { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 },
+      { -5, 9, -5, 9, -5, 9, -5, 9, -5, 9, -5, 9, -5, 9, -5, 9 },
+      { -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18,
+        49 },
+      { 107, -23, 107, -23, 107, -23, 107, -23, 107, -23, 107, -23, 107, -23,
+        107, -23 },
+      { 11, -5, 11, -5, 11, -5, 11, -5, 11, -5, 11, -5, 11, -5, 11, -5 },
+      { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 },
+  },
+  {
+      { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 },
+      { -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8 },
+      { -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15,
+        38 },
+      { 114, -21, 114, -21, 114, -21, 114, -21, 114, -21, 114, -21, 114, -21,
+        114, -21 },
+      { 10, -4, 10, -4, 10, -4, 10, -4, 10, -4, 10, -4, 10, -4, 10, -4 },
+      { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 },
+  },
+  {
+      { -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1 },
+      { -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6 },
+      { -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11,
+        28 },
+      { 120, -17, 120, -17, 120, -17, 120, -17, 120, -17, 120, -17, 120, -17,
+        120, -17 },
+      { 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4 },
+      { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 },
+  },
+  {
+      { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+      { -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4 },
+      { -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18 },
+      { 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12,
+        124, -12 },
+      { 5, -3, 5, -3, 5, -3, 5, -3, 5, -3, 5, -3, 5, -3, 5, -3 },
+      { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+  },
+  {
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 },
+      { -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8 },
+      { 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127,
+        -7 },
+      { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 },
+      { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+  },
+};
+#endif
diff --git a/av1/common/x86/vp10_convolve_ssse3.c b/av1/common/x86/vp10_convolve_ssse3.c
new file mode 100644
index 0000000..e891d74
--- /dev/null
+++ b/av1/common/x86/vp10_convolve_ssse3.c
@@ -0,0 +1,878 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <tmmintrin.h>
+
+#include "./vp10_rtcd.h"
+#include "av1/common/filter.h"
+
+#define WIDTH_BOUND (16)
+#define HEIGHT_BOUND (16)
+
+static INLINE void transpose_4x8(const __m128i *in, __m128i *out) {
+  __m128i t0, t1;
+
+  t0 = _mm_unpacklo_epi16(in[0], in[1]);
+  t1 = _mm_unpacklo_epi16(in[2], in[3]);
+
+  out[0] = _mm_unpacklo_epi32(t0, t1);
+  out[1] = _mm_srli_si128(out[0], 8);
+  out[2] = _mm_unpackhi_epi32(t0, t1);
+  out[3] = _mm_srli_si128(out[2], 8);
+
+  t0 = _mm_unpackhi_epi16(in[0], in[1]);
+  t1 = _mm_unpackhi_epi16(in[2], in[3]);
+
+  out[4] = _mm_unpacklo_epi32(t0, t1);
+  out[5] = _mm_srli_si128(out[4], 8);
+  // Note: We ignore out[6] and out[7] because
+  // they're zero vectors.
+}
+
+typedef void (*store_pixel_t)(const __m128i *x, uint8_t *dst);
+
+static INLINE __m128i accumulate_store(const __m128i *x, uint8_t *src) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i one = _mm_set1_epi16(1);
+  __m128i y = _mm_loadl_epi64((__m128i const *)src);
+  y = _mm_unpacklo_epi8(y, zero);
+  y = _mm_add_epi16(*x, y);
+  y = _mm_add_epi16(y, one);
+  y = _mm_srai_epi16(y, 1);
+  y = _mm_packus_epi16(y, y);
+  return y;
+}
+
+static INLINE void store_2_pixel_only(const __m128i *x, uint8_t *dst) {
+  uint32_t temp;
+  __m128i u = _mm_packus_epi16(*x, *x);
+  temp = _mm_cvtsi128_si32(u);
+  *(uint16_t *)dst = (uint16_t)temp;
+}
+
+static INLINE void accumulate_store_2_pixel(const __m128i *x, uint8_t *dst) {
+  uint32_t temp;
+  __m128i y = accumulate_store(x, dst);
+  temp = _mm_cvtsi128_si32(y);
+  *(uint16_t *)dst = (uint16_t)temp;
+}
+
+static store_pixel_t store2pixelTab[2] = { store_2_pixel_only,
+                                           accumulate_store_2_pixel };
+
+static INLINE void store_4_pixel_only(const __m128i *x, uint8_t *dst) {
+  __m128i u = _mm_packus_epi16(*x, *x);
+  *(int *)dst = _mm_cvtsi128_si32(u);
+}
+
+static INLINE void accumulate_store_4_pixel(const __m128i *x, uint8_t *dst) {
+  __m128i y = accumulate_store(x, dst);
+  *(int *)dst = _mm_cvtsi128_si32(y);
+}
+
+static store_pixel_t store4pixelTab[2] = { store_4_pixel_only,
+                                           accumulate_store_4_pixel };
+
+static void horiz_w4_ssse3(const uint8_t *src, const __m128i *f, int tapsNum,
+                           store_pixel_t store_func, uint8_t *dst) {
+  __m128i sumPairRow[4];
+  __m128i sumPairCol[8];
+  __m128i pixel;
+  const __m128i k_256 = _mm_set1_epi16(1 << 8);
+  const __m128i zero = _mm_setzero_si128();
+
+  if (10 == tapsNum) {
+    src -= 1;
+  }
+
+  pixel = _mm_loadu_si128((__m128i const *)src);
+  sumPairRow[0] = _mm_maddubs_epi16(pixel, f[0]);
+  sumPairRow[2] = _mm_maddubs_epi16(pixel, f[1]);
+  sumPairRow[2] = _mm_srli_si128(sumPairRow[2], 2);
+
+  pixel = _mm_loadu_si128((__m128i const *)(src + 1));
+  sumPairRow[1] = _mm_maddubs_epi16(pixel, f[0]);
+  sumPairRow[3] = _mm_maddubs_epi16(pixel, f[1]);
+  sumPairRow[3] = _mm_srli_si128(sumPairRow[3], 2);
+
+  transpose_4x8(sumPairRow, sumPairCol);
+
+  sumPairRow[0] = _mm_adds_epi16(sumPairCol[0], sumPairCol[1]);
+  sumPairRow[1] = _mm_adds_epi16(sumPairCol[4], sumPairCol[5]);
+
+  sumPairRow[2] = _mm_min_epi16(sumPairCol[2], sumPairCol[3]);
+  sumPairRow[3] = _mm_max_epi16(sumPairCol[2], sumPairCol[3]);
+
+  sumPairRow[0] = _mm_adds_epi16(sumPairRow[0], sumPairRow[1]);
+  sumPairRow[0] = _mm_adds_epi16(sumPairRow[0], sumPairRow[2]);
+  sumPairRow[0] = _mm_adds_epi16(sumPairRow[0], sumPairRow[3]);
+
+  sumPairRow[1] = _mm_mulhrs_epi16(sumPairRow[0], k_256);
+  sumPairRow[1] = _mm_packus_epi16(sumPairRow[1], sumPairRow[1]);
+  sumPairRow[1] = _mm_unpacklo_epi8(sumPairRow[1], zero);
+
+  store_func(&sumPairRow[1], dst);
+}
+
+static void horiz_w8_ssse3(const uint8_t *src, const __m128i *f, int tapsNum,
+                           store_pixel_t store, uint8_t *buf) {
+  horiz_w4_ssse3(src, f, tapsNum, store, buf);
+  src += 4;
+  buf += 4;
+  horiz_w4_ssse3(src, f, tapsNum, store, buf);
+}
+
+static void horiz_w16_ssse3(const uint8_t *src, const __m128i *f, int tapsNum,
+                            store_pixel_t store, uint8_t *buf) {
+  horiz_w8_ssse3(src, f, tapsNum, store, buf);
+  src += 8;
+  buf += 8;
+  horiz_w8_ssse3(src, f, tapsNum, store, buf);
+}
+
+static void horiz_w32_ssse3(const uint8_t *src, const __m128i *f, int tapsNum,
+                            store_pixel_t store, uint8_t *buf) {
+  horiz_w16_ssse3(src, f, tapsNum, store, buf);
+  src += 16;
+  buf += 16;
+  horiz_w16_ssse3(src, f, tapsNum, store, buf);
+}
+
+static void horiz_w64_ssse3(const uint8_t *src, const __m128i *f, int tapsNum,
+                            store_pixel_t store, uint8_t *buf) {
+  horiz_w32_ssse3(src, f, tapsNum, store, buf);
+  src += 32;
+  buf += 32;
+  horiz_w32_ssse3(src, f, tapsNum, store, buf);
+}
+
+static void horiz_w128_ssse3(const uint8_t *src, const __m128i *f, int tapsNum,
+                             store_pixel_t store, uint8_t *buf) {
+  horiz_w64_ssse3(src, f, tapsNum, store, buf);
+  src += 64;
+  buf += 64;
+  horiz_w64_ssse3(src, f, tapsNum, store, buf);
+}
+
+static void (*horizTab[6])(const uint8_t *, const __m128i *, int, store_pixel_t,
+                           uint8_t *) = {
+  horiz_w4_ssse3,  horiz_w8_ssse3,  horiz_w16_ssse3,
+  horiz_w32_ssse3, horiz_w64_ssse3, horiz_w128_ssse3,
+};
+
+static void filter_horiz_ssse3(const uint8_t *src, __m128i *f, int tapsNum,
+                               int width, store_pixel_t store, uint8_t *dst) {
+  switch (width) {
+    // Note:
+    // For width=2 and 4, store function must be different
+    case 2:
+    case 4: horizTab[0](src, f, tapsNum, store, dst); break;
+    case 8: horizTab[1](src, f, tapsNum, store, dst); break;
+    case 16: horizTab[2](src, f, tapsNum, store, dst); break;
+    case 32: horizTab[3](src, f, tapsNum, store, dst); break;
+    case 64: horizTab[4](src, f, tapsNum, store, dst); break;
+    case 128: horizTab[5](src, f, tapsNum, store, dst); break;
+    default: assert(0);
+  }
+}
+
+// Vertical 8-pixel parallel
+typedef void (*transpose_to_dst_t)(const uint16_t *src, int src_stride,
+                                   uint8_t *dst, int dst_stride);
+
+static INLINE void transpose8x8_direct_to_dst(const uint16_t *src,
+                                              int src_stride, uint8_t *dst,
+                                              int dst_stride) {
+  const __m128i k_256 = _mm_set1_epi16(1 << 8);
+  __m128i v0, v1, v2, v3;
+
+  __m128i u0 = _mm_loadu_si128((__m128i const *)(src + 0 * src_stride));
+  __m128i u1 = _mm_loadu_si128((__m128i const *)(src + 1 * src_stride));
+  __m128i u2 = _mm_loadu_si128((__m128i const *)(src + 2 * src_stride));
+  __m128i u3 = _mm_loadu_si128((__m128i const *)(src + 3 * src_stride));
+  __m128i u4 = _mm_loadu_si128((__m128i const *)(src + 4 * src_stride));
+  __m128i u5 = _mm_loadu_si128((__m128i const *)(src + 5 * src_stride));
+  __m128i u6 = _mm_loadu_si128((__m128i const *)(src + 6 * src_stride));
+  __m128i u7 = _mm_loadu_si128((__m128i const *)(src + 7 * src_stride));
+
+  u0 = _mm_mulhrs_epi16(u0, k_256);
+  u1 = _mm_mulhrs_epi16(u1, k_256);
+  u2 = _mm_mulhrs_epi16(u2, k_256);
+  u3 = _mm_mulhrs_epi16(u3, k_256);
+  u4 = _mm_mulhrs_epi16(u4, k_256);
+  u5 = _mm_mulhrs_epi16(u5, k_256);
+  u6 = _mm_mulhrs_epi16(u6, k_256);
+  u7 = _mm_mulhrs_epi16(u7, k_256);
+
+  v0 = _mm_packus_epi16(u0, u1);
+  v1 = _mm_packus_epi16(u2, u3);
+  v2 = _mm_packus_epi16(u4, u5);
+  v3 = _mm_packus_epi16(u6, u7);
+
+  u0 = _mm_unpacklo_epi8(v0, v1);
+  u1 = _mm_unpackhi_epi8(v0, v1);
+  u2 = _mm_unpacklo_epi8(v2, v3);
+  u3 = _mm_unpackhi_epi8(v2, v3);
+
+  u4 = _mm_unpacklo_epi8(u0, u1);
+  u5 = _mm_unpacklo_epi8(u2, u3);
+  u6 = _mm_unpackhi_epi8(u0, u1);
+  u7 = _mm_unpackhi_epi8(u2, u3);
+
+  u0 = _mm_unpacklo_epi32(u4, u5);
+  u1 = _mm_unpackhi_epi32(u4, u5);
+  u2 = _mm_unpacklo_epi32(u6, u7);
+  u3 = _mm_unpackhi_epi32(u6, u7);
+
+  u4 = _mm_srli_si128(u0, 8);
+  u5 = _mm_srli_si128(u1, 8);
+  u6 = _mm_srli_si128(u2, 8);
+  u7 = _mm_srli_si128(u3, 8);
+
+  _mm_storel_epi64((__m128i *)dst, u0);
+  _mm_storel_epi64((__m128i *)(dst + dst_stride * 1), u4);
+  _mm_storel_epi64((__m128i *)(dst + dst_stride * 2), u1);
+  _mm_storel_epi64((__m128i *)(dst + dst_stride * 3), u5);
+  _mm_storel_epi64((__m128i *)(dst + dst_stride * 4), u2);
+  _mm_storel_epi64((__m128i *)(dst + dst_stride * 5), u6);
+  _mm_storel_epi64((__m128i *)(dst + dst_stride * 6), u3);
+  _mm_storel_epi64((__m128i *)(dst + dst_stride * 7), u7);
+}
+
+static INLINE void transpose8x8_accumu_to_dst(const uint16_t *src,
+                                              int src_stride, uint8_t *dst,
+                                              int dst_stride) {
+  const __m128i k_256 = _mm_set1_epi16(1 << 8);
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i one = _mm_set1_epi16(1);
+  __m128i v0, v1, v2, v3, v4, v5, v6, v7;
+
+  __m128i u0 = _mm_loadu_si128((__m128i const *)(src + 0 * src_stride));
+  __m128i u1 = _mm_loadu_si128((__m128i const *)(src + 1 * src_stride));
+  __m128i u2 = _mm_loadu_si128((__m128i const *)(src + 2 * src_stride));
+  __m128i u3 = _mm_loadu_si128((__m128i const *)(src + 3 * src_stride));
+  __m128i u4 = _mm_loadu_si128((__m128i const *)(src + 4 * src_stride));
+  __m128i u5 = _mm_loadu_si128((__m128i const *)(src + 5 * src_stride));
+  __m128i u6 = _mm_loadu_si128((__m128i const *)(src + 6 * src_stride));
+  __m128i u7 = _mm_loadu_si128((__m128i const *)(src + 7 * src_stride));
+
+  u0 = _mm_mulhrs_epi16(u0, k_256);
+  u1 = _mm_mulhrs_epi16(u1, k_256);
+  u2 = _mm_mulhrs_epi16(u2, k_256);
+  u3 = _mm_mulhrs_epi16(u3, k_256);
+  u4 = _mm_mulhrs_epi16(u4, k_256);
+  u5 = _mm_mulhrs_epi16(u5, k_256);
+  u6 = _mm_mulhrs_epi16(u6, k_256);
+  u7 = _mm_mulhrs_epi16(u7, k_256);
+
+  v0 = _mm_packus_epi16(u0, u1);
+  v1 = _mm_packus_epi16(u2, u3);
+  v2 = _mm_packus_epi16(u4, u5);
+  v3 = _mm_packus_epi16(u6, u7);
+
+  u0 = _mm_unpacklo_epi8(v0, v1);
+  u1 = _mm_unpackhi_epi8(v0, v1);
+  u2 = _mm_unpacklo_epi8(v2, v3);
+  u3 = _mm_unpackhi_epi8(v2, v3);
+
+  u4 = _mm_unpacklo_epi8(u0, u1);
+  u5 = _mm_unpacklo_epi8(u2, u3);
+  u6 = _mm_unpackhi_epi8(u0, u1);
+  u7 = _mm_unpackhi_epi8(u2, u3);
+
+  u0 = _mm_unpacklo_epi32(u4, u5);
+  u1 = _mm_unpackhi_epi32(u4, u5);
+  u2 = _mm_unpacklo_epi32(u6, u7);
+  u3 = _mm_unpackhi_epi32(u6, u7);
+
+  u4 = _mm_srli_si128(u0, 8);
+  u5 = _mm_srli_si128(u1, 8);
+  u6 = _mm_srli_si128(u2, 8);
+  u7 = _mm_srli_si128(u3, 8);
+
+  v0 = _mm_loadl_epi64((__m128i const *)(dst + 0 * dst_stride));
+  v1 = _mm_loadl_epi64((__m128i const *)(dst + 1 * dst_stride));
+  v2 = _mm_loadl_epi64((__m128i const *)(dst + 2 * dst_stride));
+  v3 = _mm_loadl_epi64((__m128i const *)(dst + 3 * dst_stride));
+  v4 = _mm_loadl_epi64((__m128i const *)(dst + 4 * dst_stride));
+  v5 = _mm_loadl_epi64((__m128i const *)(dst + 5 * dst_stride));
+  v6 = _mm_loadl_epi64((__m128i const *)(dst + 6 * dst_stride));
+  v7 = _mm_loadl_epi64((__m128i const *)(dst + 7 * dst_stride));
+
+  u0 = _mm_unpacklo_epi8(u0, zero);
+  u1 = _mm_unpacklo_epi8(u1, zero);
+  u2 = _mm_unpacklo_epi8(u2, zero);
+  u3 = _mm_unpacklo_epi8(u3, zero);
+  u4 = _mm_unpacklo_epi8(u4, zero);
+  u5 = _mm_unpacklo_epi8(u5, zero);
+  u6 = _mm_unpacklo_epi8(u6, zero);
+  u7 = _mm_unpacklo_epi8(u7, zero);
+
+  v0 = _mm_unpacklo_epi8(v0, zero);
+  v1 = _mm_unpacklo_epi8(v1, zero);
+  v2 = _mm_unpacklo_epi8(v2, zero);
+  v3 = _mm_unpacklo_epi8(v3, zero);
+  v4 = _mm_unpacklo_epi8(v4, zero);
+  v5 = _mm_unpacklo_epi8(v5, zero);
+  v6 = _mm_unpacklo_epi8(v6, zero);
+  v7 = _mm_unpacklo_epi8(v7, zero);
+
+  v0 = _mm_adds_epi16(u0, v0);
+  v1 = _mm_adds_epi16(u4, v1);
+  v2 = _mm_adds_epi16(u1, v2);
+  v3 = _mm_adds_epi16(u5, v3);
+  v4 = _mm_adds_epi16(u2, v4);
+  v5 = _mm_adds_epi16(u6, v5);
+  v6 = _mm_adds_epi16(u3, v6);
+  v7 = _mm_adds_epi16(u7, v7);
+
+  v0 = _mm_adds_epi16(v0, one);
+  v1 = _mm_adds_epi16(v1, one);
+  v2 = _mm_adds_epi16(v2, one);
+  v3 = _mm_adds_epi16(v3, one);
+  v4 = _mm_adds_epi16(v4, one);
+  v5 = _mm_adds_epi16(v5, one);
+  v6 = _mm_adds_epi16(v6, one);
+  v7 = _mm_adds_epi16(v7, one);
+
+  v0 = _mm_srai_epi16(v0, 1);
+  v1 = _mm_srai_epi16(v1, 1);
+  v2 = _mm_srai_epi16(v2, 1);
+  v3 = _mm_srai_epi16(v3, 1);
+  v4 = _mm_srai_epi16(v4, 1);
+  v5 = _mm_srai_epi16(v5, 1);
+  v6 = _mm_srai_epi16(v6, 1);
+  v7 = _mm_srai_epi16(v7, 1);
+
+  u0 = _mm_packus_epi16(v0, v1);
+  u1 = _mm_packus_epi16(v2, v3);
+  u2 = _mm_packus_epi16(v4, v5);
+  u3 = _mm_packus_epi16(v6, v7);
+
+  u4 = _mm_srli_si128(u0, 8);
+  u5 = _mm_srli_si128(u1, 8);
+  u6 = _mm_srli_si128(u2, 8);
+  u7 = _mm_srli_si128(u3, 8);
+
+  _mm_storel_epi64((__m128i *)dst, u0);
+  _mm_storel_epi64((__m128i *)(dst + dst_stride * 1), u4);
+  _mm_storel_epi64((__m128i *)(dst + dst_stride * 2), u1);
+  _mm_storel_epi64((__m128i *)(dst + dst_stride * 3), u5);
+  _mm_storel_epi64((__m128i *)(dst + dst_stride * 4), u2);
+  _mm_storel_epi64((__m128i *)(dst + dst_stride * 5), u6);
+  _mm_storel_epi64((__m128i *)(dst + dst_stride * 6), u3);
+  _mm_storel_epi64((__m128i *)(dst + dst_stride * 7), u7);
+}
+
+static transpose_to_dst_t trans8x8Tab[2] = { transpose8x8_direct_to_dst,
+                                             transpose8x8_accumu_to_dst };
+
+static INLINE void transpose_8x16(const __m128i *in, __m128i *out) {
+  __m128i t0, t1, t2, t3, u0, u1;
+
+  t0 = _mm_unpacklo_epi16(in[0], in[1]);
+  t1 = _mm_unpacklo_epi16(in[2], in[3]);
+  t2 = _mm_unpacklo_epi16(in[4], in[5]);
+  t3 = _mm_unpacklo_epi16(in[6], in[7]);
+
+  u0 = _mm_unpacklo_epi32(t0, t1);
+  u1 = _mm_unpacklo_epi32(t2, t3);
+
+  out[0] = _mm_unpacklo_epi64(u0, u1);
+  out[1] = _mm_unpackhi_epi64(u0, u1);
+
+  u0 = _mm_unpackhi_epi32(t0, t1);
+  u1 = _mm_unpackhi_epi32(t2, t3);
+
+  out[2] = _mm_unpacklo_epi64(u0, u1);
+  out[3] = _mm_unpackhi_epi64(u0, u1);
+
+  t0 = _mm_unpackhi_epi16(in[0], in[1]);
+  t1 = _mm_unpackhi_epi16(in[2], in[3]);
+  t2 = _mm_unpackhi_epi16(in[4], in[5]);
+  t3 = _mm_unpackhi_epi16(in[6], in[7]);
+
+  u0 = _mm_unpacklo_epi32(t0, t1);
+  u1 = _mm_unpacklo_epi32(t2, t3);
+
+  out[4] = _mm_unpacklo_epi64(u0, u1);
+  out[5] = _mm_unpackhi_epi64(u0, u1);
+
+  // Ignore out[6] and out[7]
+  // they're zero vectors.
+}
+
+static void filter_horiz_v8p_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch,
+                                   __m128i *f, int tapsNum, uint16_t *buf) {
+  __m128i s[8], t[6];
+  __m128i min_x2x3, max_x2x3;
+  __m128i temp;
+
+  if (tapsNum == 10) {
+    src_ptr -= 1;
+  }
+  s[0] = _mm_loadu_si128((const __m128i *)src_ptr);
+  s[1] = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch));
+  s[2] = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 2));
+  s[3] = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 3));
+  s[4] = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 4));
+  s[5] = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 5));
+  s[6] = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 6));
+  s[7] = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7));
+
+  // TRANSPOSE...
+  // Vecotor represents column pixel pairs instead of a row
+  transpose_8x16(s, t);
+
+  // multiply 2 adjacent elements with the filter and add the result
+  s[0] = _mm_maddubs_epi16(t[0], f[0]);
+  s[1] = _mm_maddubs_epi16(t[1], f[1]);
+  s[2] = _mm_maddubs_epi16(t[2], f[2]);
+  s[3] = _mm_maddubs_epi16(t[3], f[3]);
+  s[4] = _mm_maddubs_epi16(t[4], f[4]);
+  s[5] = _mm_maddubs_epi16(t[5], f[5]);
+
+  // add and saturate the results together
+  min_x2x3 = _mm_min_epi16(s[2], s[3]);
+  max_x2x3 = _mm_max_epi16(s[2], s[3]);
+  temp = _mm_adds_epi16(s[0], s[1]);
+  temp = _mm_adds_epi16(temp, s[5]);
+  temp = _mm_adds_epi16(temp, s[4]);
+
+  temp = _mm_adds_epi16(temp, min_x2x3);
+  temp = _mm_adds_epi16(temp, max_x2x3);
+
+  _mm_storeu_si128((__m128i *)buf, temp);
+}
+
+// Vertical 4-pixel parallel
+static INLINE void transpose4x4_direct_to_dst(const uint16_t *src,
+                                              int src_stride, uint8_t *dst,
+                                              int dst_stride) {
+  const __m128i k_256 = _mm_set1_epi16(1 << 8);
+  __m128i v0, v1, v2, v3;
+
+  // TODO(luoyi): two loads, 8 elements per load (two bytes per element)
+  __m128i u0 = _mm_loadl_epi64((__m128i const *)(src + 0 * src_stride));
+  __m128i u1 = _mm_loadl_epi64((__m128i const *)(src + 1 * src_stride));
+  __m128i u2 = _mm_loadl_epi64((__m128i const *)(src + 2 * src_stride));
+  __m128i u3 = _mm_loadl_epi64((__m128i const *)(src + 3 * src_stride));
+
+  v0 = _mm_unpacklo_epi16(u0, u1);
+  v1 = _mm_unpacklo_epi16(u2, u3);
+
+  v2 = _mm_unpacklo_epi32(v0, v1);
+  v3 = _mm_unpackhi_epi32(v0, v1);
+
+  u0 = _mm_mulhrs_epi16(v2, k_256);
+  u1 = _mm_mulhrs_epi16(v3, k_256);
+
+  u0 = _mm_packus_epi16(u0, u1);
+  u1 = _mm_srli_si128(u0, 4);
+  u2 = _mm_srli_si128(u0, 8);
+  u3 = _mm_srli_si128(u0, 12);
+
+  *(int *)(dst) = _mm_cvtsi128_si32(u0);
+  *(int *)(dst + dst_stride) = _mm_cvtsi128_si32(u1);
+  *(int *)(dst + dst_stride * 2) = _mm_cvtsi128_si32(u2);
+  *(int *)(dst + dst_stride * 3) = _mm_cvtsi128_si32(u3);
+}
+
+static INLINE void transpose4x4_accumu_to_dst(const uint16_t *src,
+                                              int src_stride, uint8_t *dst,
+                                              int dst_stride) {
+  const __m128i k_256 = _mm_set1_epi16(1 << 8);
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i one = _mm_set1_epi16(1);
+
+  __m128i v0, v1, v2, v3;
+
+  __m128i u0 = _mm_loadl_epi64((__m128i const *)(src));
+  __m128i u1 = _mm_loadl_epi64((__m128i const *)(src + src_stride));
+  __m128i u2 = _mm_loadl_epi64((__m128i const *)(src + 2 * src_stride));
+  __m128i u3 = _mm_loadl_epi64((__m128i const *)(src + 3 * src_stride));
+
+  v0 = _mm_unpacklo_epi16(u0, u1);
+  v1 = _mm_unpacklo_epi16(u2, u3);
+
+  v2 = _mm_unpacklo_epi32(v0, v1);
+  v3 = _mm_unpackhi_epi32(v0, v1);
+
+  u0 = _mm_mulhrs_epi16(v2, k_256);
+  u1 = _mm_mulhrs_epi16(v3, k_256);
+
+  u2 = _mm_packus_epi16(u0, u1);
+  u0 = _mm_unpacklo_epi8(u2, zero);
+  u1 = _mm_unpackhi_epi8(u2, zero);
+
+  // load pixel values
+  v0 = _mm_loadl_epi64((__m128i const *)(dst));
+  v1 = _mm_loadl_epi64((__m128i const *)(dst + dst_stride));
+  v2 = _mm_loadl_epi64((__m128i const *)(dst + 2 * dst_stride));
+  v3 = _mm_loadl_epi64((__m128i const *)(dst + 3 * dst_stride));
+
+  v0 = _mm_unpacklo_epi8(v0, zero);
+  v1 = _mm_unpacklo_epi8(v1, zero);
+  v2 = _mm_unpacklo_epi8(v2, zero);
+  v3 = _mm_unpacklo_epi8(v3, zero);
+
+  v0 = _mm_unpacklo_epi64(v0, v1);
+  v1 = _mm_unpacklo_epi64(v2, v3);
+
+  u0 = _mm_adds_epi16(u0, v0);
+  u1 = _mm_adds_epi16(u1, v1);
+
+  u0 = _mm_adds_epi16(u0, one);
+  u1 = _mm_adds_epi16(u1, one);
+
+  u0 = _mm_srai_epi16(u0, 1);
+  u1 = _mm_srai_epi16(u1, 1);
+
+  // saturation and pack to pixels
+  u0 = _mm_packus_epi16(u0, u1);
+  u1 = _mm_srli_si128(u0, 4);
+  u2 = _mm_srli_si128(u0, 8);
+  u3 = _mm_srli_si128(u0, 12);
+
+  *(int *)(dst) = _mm_cvtsi128_si32(u0);
+  *(int *)(dst + dst_stride) = _mm_cvtsi128_si32(u1);
+  *(int *)(dst + dst_stride * 2) = _mm_cvtsi128_si32(u2);
+  *(int *)(dst + dst_stride * 3) = _mm_cvtsi128_si32(u3);
+}
+
+static transpose_to_dst_t trans4x4Tab[2] = { transpose4x4_direct_to_dst,
+                                             transpose4x4_accumu_to_dst };
+
+static void filter_horiz_v4p_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch,
+                                   __m128i *f, int tapsNum, uint16_t *buf) {
+  __m128i A, B, C, D;
+  __m128i tr0_0, tr0_1, s1s0, s3s2, s5s4, s7s6, s9s8, sbsa;
+  __m128i x0, x1, x2, x3, x4, x5;
+  __m128i min_x2x3, max_x2x3, temp;
+
+  if (tapsNum == 10) {
+    src_ptr -= 1;
+  }
+  A = _mm_loadu_si128((const __m128i *)src_ptr);
+  B = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch));
+  C = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 2));
+  D = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 3));
+
+  // TRANSPOSE...
+  // Vecotor represents column pixel pairs instead of a row
+  // 00 01 10 11 02 03 12 13 04 05 14 15 06 07 16 17
+  tr0_0 = _mm_unpacklo_epi16(A, B);
+  // 20 21 30 31 22 23 32 33 24 25 34 35 26 27 36 37
+  tr0_1 = _mm_unpacklo_epi16(C, D);
+  // 00 01 10 11 20 21 30 31 02 03 12 13 22 23 32 33
+  s1s0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+  // 04 05 14 15 24 25 34 35 06 07 16 17 26 27 36 37
+  s5s4 = _mm_unpackhi_epi32(tr0_0, tr0_1);
+  // 02 03 12 13 22 23 32 33
+  s3s2 = _mm_srli_si128(s1s0, 8);
+  // 06 07 16 17 26 27 36 37
+  s7s6 = _mm_srli_si128(s5s4, 8);
+
+  tr0_0 = _mm_unpackhi_epi16(A, B);
+  tr0_1 = _mm_unpackhi_epi16(C, D);
+  s9s8 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+  sbsa = _mm_srli_si128(s9s8, 8);
+
+  // multiply 2 adjacent elements with the filter and add the result
+  x0 = _mm_maddubs_epi16(s1s0, f[0]);
+  x1 = _mm_maddubs_epi16(s3s2, f[1]);
+  x2 = _mm_maddubs_epi16(s5s4, f[2]);
+  x3 = _mm_maddubs_epi16(s7s6, f[3]);
+  x4 = _mm_maddubs_epi16(s9s8, f[4]);
+  x5 = _mm_maddubs_epi16(sbsa, f[5]);
+  // add and saturate the results together
+  min_x2x3 = _mm_min_epi16(x2, x3);
+  max_x2x3 = _mm_max_epi16(x2, x3);
+  temp = _mm_adds_epi16(x0, x1);
+  temp = _mm_adds_epi16(temp, x5);
+  temp = _mm_adds_epi16(temp, x4);
+
+  temp = _mm_adds_epi16(temp, min_x2x3);
+  temp = _mm_adds_epi16(temp, max_x2x3);
+  _mm_storel_epi64((__m128i *)buf, temp);
+}
+
+// Note:
+//  This function assumes:
+// (1) 10/12-taps filters
+// (2) x_step_q4 = 16 then filter is fixed at the call
+
+void vp10_convolve_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst,
+                               int dst_stride, int w, int h,
+                               const InterpFilterParams filter_params,
+                               const int subpel_x_q4, int x_step_q4, int avg) {
+  DECLARE_ALIGNED(16, uint16_t, temp[8 * 8]);
+  __m128i verf[6];
+  __m128i horf[2];
+  SubpelFilterCoeffs hCoeffs, vCoeffs;
+  const uint8_t *src_ptr;
+  store_pixel_t store2p = store2pixelTab[avg];
+  store_pixel_t store4p = store4pixelTab[avg];
+  transpose_to_dst_t transpose_4x4 = trans4x4Tab[avg];
+  transpose_to_dst_t transpose_8x8 = trans8x8Tab[avg];
+
+  const int tapsNum = filter_params.taps;
+  int block_height, block_residu;
+  int i, col, count;
+  (void)x_step_q4;
+
+  if (0 == subpel_x_q4 || 16 != x_step_q4) {
+    vp10_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h, filter_params,
+                          subpel_x_q4, x_step_q4, avg);
+    return;
+  }
+
+  hCoeffs = vp10_get_subpel_filter_signal_dir(filter_params, subpel_x_q4 - 1);
+  vCoeffs =
+      vp10_get_subpel_filter_ver_signal_dir(filter_params, subpel_x_q4 - 1);
+
+  if (!hCoeffs || !vCoeffs) {
+    vp10_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h, filter_params,
+                          subpel_x_q4, x_step_q4, avg);
+    return;
+  }
+
+  verf[0] = *((const __m128i *)(vCoeffs));
+  verf[1] = *((const __m128i *)(vCoeffs + 1));
+  verf[2] = *((const __m128i *)(vCoeffs + 2));
+  verf[3] = *((const __m128i *)(vCoeffs + 3));
+  verf[4] = *((const __m128i *)(vCoeffs + 4));
+  verf[5] = *((const __m128i *)(vCoeffs + 5));
+
+  horf[0] = *((const __m128i *)(hCoeffs));
+  horf[1] = *((const __m128i *)(hCoeffs + 1));
+
+  count = 0;
+
+  // here tapsNum is filter size
+  src -= (tapsNum >> 1) - 1;
+  src_ptr = src;
+  if (w > WIDTH_BOUND && h > HEIGHT_BOUND) {
+    // 8-pixels parallel
+    block_height = h >> 3;
+    block_residu = h & 7;
+
+    do {
+      for (col = 0; col < w; col += 8) {
+        for (i = 0; i < 8; ++i) {
+          filter_horiz_v8p_ssse3(src_ptr, src_stride, verf, tapsNum,
+                                 temp + (i * 8));
+          src_ptr += 1;
+        }
+        transpose_8x8(temp, 8, dst + col, dst_stride);
+      }
+      count++;
+      src_ptr = src + count * src_stride * 8;
+      dst += dst_stride * 8;
+    } while (count < block_height);
+
+    for (i = 0; i < block_residu; ++i) {
+      filter_horiz_ssse3(src_ptr, horf, tapsNum, w, store4p, dst);
+      src_ptr += src_stride;
+      dst += dst_stride;
+    }
+  } else {
+    if (w > 2) {
+      // 4-pixels parallel
+      block_height = h >> 2;
+      block_residu = h & 3;
+
+      do {
+        for (col = 0; col < w; col += 4) {
+          for (i = 0; i < 4; ++i) {
+            filter_horiz_v4p_ssse3(src_ptr, src_stride, verf, tapsNum,
+                                   temp + (i * 4));
+            src_ptr += 1;
+          }
+          transpose_4x4(temp, 4, dst + col, dst_stride);
+        }
+        count++;
+        src_ptr = src + count * src_stride * 4;
+        dst += dst_stride * 4;
+      } while (count < block_height);
+
+      for (i = 0; i < block_residu; ++i) {
+        filter_horiz_ssse3(src_ptr, horf, tapsNum, w, store4p, dst);
+        src_ptr += src_stride;
+        dst += dst_stride;
+      }
+    } else {
+      for (i = 0; i < h; i++) {
+        filter_horiz_ssse3(src_ptr, horf, tapsNum, w, store2p, dst);
+        src_ptr += src_stride;
+        dst += dst_stride;
+      }
+    }
+  }
+}
+
+// Vertical convolution filtering
+static INLINE void store_8_pixel_only(const __m128i *x, uint8_t *dst) {
+  __m128i u = _mm_packus_epi16(*x, *x);
+  _mm_storel_epi64((__m128i *)dst, u);
+}
+
+static INLINE void accumulate_store_8_pixel(const __m128i *x, uint8_t *dst) {
+  __m128i y = accumulate_store(x, dst);
+  _mm_storel_epi64((__m128i *)dst, y);
+}
+
+static store_pixel_t store8pixelTab[2] = { store_8_pixel_only,
+                                           accumulate_store_8_pixel };
+
+static __m128i filter_vert_ssse3(const uint8_t *src, int src_stride,
+                                 int tapsNum, __m128i *f) {
+  __m128i s[12];
+  const __m128i k_256 = _mm_set1_epi16(1 << 8);
+  const __m128i zero = _mm_setzero_si128();
+  __m128i min_x2x3, max_x2x3, sum;
+  int i = 0;
+  int r = 0;
+
+  if (10 == tapsNum) {
+    i += 1;
+    s[0] = zero;
+  }
+  while (i < 12) {
+    s[i] = _mm_loadu_si128((__m128i const *)(src + r * src_stride));
+    i += 1;
+    r += 1;
+  }
+
+  s[0] = _mm_unpacklo_epi8(s[0], s[1]);
+  s[2] = _mm_unpacklo_epi8(s[2], s[3]);
+  s[4] = _mm_unpacklo_epi8(s[4], s[5]);
+  s[6] = _mm_unpacklo_epi8(s[6], s[7]);
+  s[8] = _mm_unpacklo_epi8(s[8], s[9]);
+  s[10] = _mm_unpacklo_epi8(s[10], s[11]);
+
+  s[0] = _mm_maddubs_epi16(s[0], f[0]);
+  s[2] = _mm_maddubs_epi16(s[2], f[1]);
+  s[4] = _mm_maddubs_epi16(s[4], f[2]);
+  s[6] = _mm_maddubs_epi16(s[6], f[3]);
+  s[8] = _mm_maddubs_epi16(s[8], f[4]);
+  s[10] = _mm_maddubs_epi16(s[10], f[5]);
+
+  min_x2x3 = _mm_min_epi16(s[4], s[6]);
+  max_x2x3 = _mm_max_epi16(s[4], s[6]);
+  sum = _mm_adds_epi16(s[0], s[2]);
+  sum = _mm_adds_epi16(sum, s[10]);
+  sum = _mm_adds_epi16(sum, s[8]);
+
+  sum = _mm_adds_epi16(sum, min_x2x3);
+  sum = _mm_adds_epi16(sum, max_x2x3);
+
+  sum = _mm_mulhrs_epi16(sum, k_256);
+  sum = _mm_packus_epi16(sum, sum);
+  sum = _mm_unpacklo_epi8(sum, zero);
+  return sum;
+}
+
+static void filter_vert_horiz_parallel_ssse3(const uint8_t *src, int src_stride,
+                                             __m128i *f, int tapsNum,
+                                             store_pixel_t store_func,
+                                             uint8_t *dst) {
+  __m128i sum = filter_vert_ssse3(src, src_stride, tapsNum, f);
+  store_func(&sum, dst);
+}
+
+static void filter_vert_compute_small(const uint8_t *src, int src_stride,
+                                      __m128i *f, int tapsNum,
+                                      store_pixel_t store_func, int h,
+                                      uint8_t *dst, int dst_stride) {
+  int rowIndex = 0;
+  do {
+    filter_vert_horiz_parallel_ssse3(src, src_stride, f, tapsNum, store_func,
+                                     dst);
+    rowIndex++;
+    src += src_stride;
+    dst += dst_stride;
+  } while (rowIndex < h);
+}
+
+static void filter_vert_compute_large(const uint8_t *src, int src_stride,
+                                      __m128i *f, int tapsNum,
+                                      store_pixel_t store_func, int w, int h,
+                                      uint8_t *dst, int dst_stride) {
+  int col;
+  int rowIndex = 0;
+  const uint8_t *src_ptr = src;
+  uint8_t *dst_ptr = dst;
+
+  do {
+    for (col = 0; col < w; col += 8) {
+      filter_vert_horiz_parallel_ssse3(src_ptr, src_stride, f, tapsNum,
+                                       store_func, dst_ptr);
+      src_ptr += 8;
+      dst_ptr += 8;
+    }
+    rowIndex++;
+    src_ptr = src + rowIndex * src_stride;
+    dst_ptr = dst + rowIndex * dst_stride;
+  } while (rowIndex < h);
+}
+
+void vp10_convolve_vert_ssse3(const uint8_t *src, int src_stride, uint8_t *dst,
+                              int dst_stride, int w, int h,
+                              const InterpFilterParams filter_params,
+                              const int subpel_y_q4, int y_step_q4, int avg) {
+  __m128i verf[6];
+  SubpelFilterCoeffs vCoeffs;
+  const uint8_t *src_ptr;
+  uint8_t *dst_ptr = dst;
+  store_pixel_t store2p = store2pixelTab[avg];
+  store_pixel_t store4p = store4pixelTab[avg];
+  store_pixel_t store8p = store8pixelTab[avg];
+  const int tapsNum = filter_params.taps;
+
+  if (0 == subpel_y_q4 || 16 != y_step_q4) {
+    vp10_convolve_vert_c(src, src_stride, dst, dst_stride, w, h, filter_params,
+                         subpel_y_q4, y_step_q4, avg);
+    return;
+  }
+
+  vCoeffs =
+      vp10_get_subpel_filter_ver_signal_dir(filter_params, subpel_y_q4 - 1);
+
+  if (!vCoeffs) {
+    vp10_convolve_vert_c(src, src_stride, dst, dst_stride, w, h, filter_params,
+                         subpel_y_q4, y_step_q4, avg);
+    return;
+  }
+
+  verf[0] = *((const __m128i *)(vCoeffs));
+  verf[1] = *((const __m128i *)(vCoeffs + 1));
+  verf[2] = *((const __m128i *)(vCoeffs + 2));
+  verf[3] = *((const __m128i *)(vCoeffs + 3));
+  verf[4] = *((const __m128i *)(vCoeffs + 4));
+  verf[5] = *((const __m128i *)(vCoeffs + 5));
+
+  src -= src_stride * ((tapsNum >> 1) - 1);
+  src_ptr = src;
+
+  if (w > 4) {
+    filter_vert_compute_large(src_ptr, src_stride, verf, tapsNum, store8p, w, h,
+                              dst_ptr, dst_stride);
+  } else if (4 == w) {
+    filter_vert_compute_small(src_ptr, src_stride, verf, tapsNum, store4p, h,
+                              dst_ptr, dst_stride);
+  } else if (2 == w) {
+    filter_vert_compute_small(src_ptr, src_stride, verf, tapsNum, store2p, h,
+                              dst_ptr, dst_stride);
+  } else {
+    assert(0);
+  }
+}
diff --git a/av1/common/x86/vp10_fwd_dct32x32_impl_sse2.h b/av1/common/x86/vp10_fwd_dct32x32_impl_sse2.h
new file mode 100644
index 0000000..e7d63fe
--- /dev/null
+++ b/av1/common/x86/vp10_fwd_dct32x32_impl_sse2.h
@@ -0,0 +1,3201 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <emmintrin.h>  // SSE2
+
+#include "./vp10_rtcd.h"
+#include "av1/common/vp10_fwd_txfm.h"
+#include "aom_dsp/txfm_common.h"
+#include "aom_dsp/x86/txfm_common_sse2.h"
+
+// TODO(jingning) The high bit-depth version needs re-work for performance.
+// The current SSE2 implementation also causes cross reference to the static
+// functions in the C implementation file.
+#if DCT_HIGH_BIT_DEPTH
+#define ADD_EPI16 _mm_adds_epi16
+#define SUB_EPI16 _mm_subs_epi16
+#if FDCT32x32_HIGH_PRECISION
+void vp10_fdct32x32_rows_c(const int16_t *intermediate, tran_low_t *out) {
+  int i, j;
+  for (i = 0; i < 32; ++i) {
+    tran_high_t temp_in[32], temp_out[32];
+    for (j = 0; j < 32; ++j) temp_in[j] = intermediate[j * 32 + i];
+    vp10_fdct32(temp_in, temp_out, 0);
+    for (j = 0; j < 32; ++j)
+      out[j + i * 32] =
+          (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
+  }
+}
+#define HIGH_FDCT32x32_2D_C vp10_highbd_fdct32x32_c
+#define HIGH_FDCT32x32_2D_ROWS_C vp10_fdct32x32_rows_c
+#else
+void vp10_fdct32x32_rd_rows_c(const int16_t *intermediate, tran_low_t *out) {
+  int i, j;
+  for (i = 0; i < 32; ++i) {
+    tran_high_t temp_in[32], temp_out[32];
+    for (j = 0; j < 32; ++j) temp_in[j] = intermediate[j * 32 + i];
+    vp10_fdct32(temp_in, temp_out, 1);
+    for (j = 0; j < 32; ++j) out[j + i * 32] = (tran_low_t)temp_out[j];
+  }
+}
+#define HIGH_FDCT32x32_2D_C vp10_highbd_fdct32x32_rd_c
+#define HIGH_FDCT32x32_2D_ROWS_C vp10_fdct32x32_rd_rows_c
+#endif  // FDCT32x32_HIGH_PRECISION
+#else
+#define ADD_EPI16 _mm_add_epi16
+#define SUB_EPI16 _mm_sub_epi16
+#endif  // DCT_HIGH_BIT_DEPTH
+
+void FDCT32x32_2D(const int16_t *input, tran_low_t *output_org, int stride) {
+  // Calculate pre-multiplied strides
+  const int str1 = stride;
+  const int str2 = 2 * stride;
+  const int str3 = 2 * stride + str1;
+  // We need an intermediate buffer between passes.
+  DECLARE_ALIGNED(16, int16_t, intermediate[32 * 32]);
+  // Constants
+  //    When we use them, in one case, they are all the same. In all others
+  //    it's a pair of them that we need to repeat four times. This is done
+  //    by constructing the 32 bit constant corresponding to that pair.
+  const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
+  const __m128i k__cospi_p16_m16 = pair_set_epi16(+cospi_16_64, -cospi_16_64);
+  const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+  const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
+  const __m128i k__cospi_p24_p08 = pair_set_epi16(+cospi_24_64, cospi_8_64);
+  const __m128i k__cospi_p12_p20 = pair_set_epi16(+cospi_12_64, cospi_20_64);
+  const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+  const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64);
+  const __m128i k__cospi_p28_p04 = pair_set_epi16(+cospi_28_64, cospi_4_64);
+  const __m128i k__cospi_m28_m04 = pair_set_epi16(-cospi_28_64, -cospi_4_64);
+  const __m128i k__cospi_m12_m20 = pair_set_epi16(-cospi_12_64, -cospi_20_64);
+  const __m128i k__cospi_p30_p02 = pair_set_epi16(+cospi_30_64, cospi_2_64);
+  const __m128i k__cospi_p14_p18 = pair_set_epi16(+cospi_14_64, cospi_18_64);
+  const __m128i k__cospi_p22_p10 = pair_set_epi16(+cospi_22_64, cospi_10_64);
+  const __m128i k__cospi_p06_p26 = pair_set_epi16(+cospi_6_64, cospi_26_64);
+  const __m128i k__cospi_m26_p06 = pair_set_epi16(-cospi_26_64, cospi_6_64);
+  const __m128i k__cospi_m10_p22 = pair_set_epi16(-cospi_10_64, cospi_22_64);
+  const __m128i k__cospi_m18_p14 = pair_set_epi16(-cospi_18_64, cospi_14_64);
+  const __m128i k__cospi_m02_p30 = pair_set_epi16(-cospi_2_64, cospi_30_64);
+  const __m128i k__cospi_p31_p01 = pair_set_epi16(+cospi_31_64, cospi_1_64);
+  const __m128i k__cospi_p15_p17 = pair_set_epi16(+cospi_15_64, cospi_17_64);
+  const __m128i k__cospi_p23_p09 = pair_set_epi16(+cospi_23_64, cospi_9_64);
+  const __m128i k__cospi_p07_p25 = pair_set_epi16(+cospi_7_64, cospi_25_64);
+  const __m128i k__cospi_m25_p07 = pair_set_epi16(-cospi_25_64, cospi_7_64);
+  const __m128i k__cospi_m09_p23 = pair_set_epi16(-cospi_9_64, cospi_23_64);
+  const __m128i k__cospi_m17_p15 = pair_set_epi16(-cospi_17_64, cospi_15_64);
+  const __m128i k__cospi_m01_p31 = pair_set_epi16(-cospi_1_64, cospi_31_64);
+  const __m128i k__cospi_p27_p05 = pair_set_epi16(+cospi_27_64, cospi_5_64);
+  const __m128i k__cospi_p11_p21 = pair_set_epi16(+cospi_11_64, cospi_21_64);
+  const __m128i k__cospi_p19_p13 = pair_set_epi16(+cospi_19_64, cospi_13_64);
+  const __m128i k__cospi_p03_p29 = pair_set_epi16(+cospi_3_64, cospi_29_64);
+  const __m128i k__cospi_m29_p03 = pair_set_epi16(-cospi_29_64, cospi_3_64);
+  const __m128i k__cospi_m13_p19 = pair_set_epi16(-cospi_13_64, cospi_19_64);
+  const __m128i k__cospi_m21_p11 = pair_set_epi16(-cospi_21_64, cospi_11_64);
+  const __m128i k__cospi_m05_p27 = pair_set_epi16(-cospi_5_64, cospi_27_64);
+  const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+  const __m128i kZero = _mm_set1_epi16(0);
+  const __m128i kOne = _mm_set1_epi16(1);
+  // Do the two transform/transpose passes
+  int pass;
+#if DCT_HIGH_BIT_DEPTH
+  int overflow;
+#endif
+  for (pass = 0; pass < 2; ++pass) {
+    // We process eight columns (transposed rows in second pass) at a time.
+    int column_start;
+    for (column_start = 0; column_start < 32; column_start += 8) {
+      __m128i step1[32];
+      __m128i step2[32];
+      __m128i step3[32];
+      __m128i out[32];
+      // Stage 1
+      // Note: even though all the loads below are aligned, using the aligned
+      //       intrinsic make the code slightly slower.
+      if (0 == pass) {
+        const int16_t *in = &input[column_start];
+        // step1[i] =  (in[ 0 * stride] + in[(32 -  1) * stride]) << 2;
+        // Note: the next four blocks could be in a loop. That would help the
+        //       instruction cache but is actually slower.
+        {
+          const int16_t *ina = in + 0 * str1;
+          const int16_t *inb = in + 31 * str1;
+          __m128i *step1a = &step1[0];
+          __m128i *step1b = &step1[31];
+          const __m128i ina0 = _mm_loadu_si128((const __m128i *)(ina));
+          const __m128i ina1 = _mm_loadu_si128((const __m128i *)(ina + str1));
+          const __m128i ina2 = _mm_loadu_si128((const __m128i *)(ina + str2));
+          const __m128i ina3 = _mm_loadu_si128((const __m128i *)(ina + str3));
+          const __m128i inb3 = _mm_loadu_si128((const __m128i *)(inb - str3));
+          const __m128i inb2 = _mm_loadu_si128((const __m128i *)(inb - str2));
+          const __m128i inb1 = _mm_loadu_si128((const __m128i *)(inb - str1));
+          const __m128i inb0 = _mm_loadu_si128((const __m128i *)(inb));
+          step1a[0] = _mm_add_epi16(ina0, inb0);
+          step1a[1] = _mm_add_epi16(ina1, inb1);
+          step1a[2] = _mm_add_epi16(ina2, inb2);
+          step1a[3] = _mm_add_epi16(ina3, inb3);
+          step1b[-3] = _mm_sub_epi16(ina3, inb3);
+          step1b[-2] = _mm_sub_epi16(ina2, inb2);
+          step1b[-1] = _mm_sub_epi16(ina1, inb1);
+          step1b[-0] = _mm_sub_epi16(ina0, inb0);
+          step1a[0] = _mm_slli_epi16(step1a[0], 2);
+          step1a[1] = _mm_slli_epi16(step1a[1], 2);
+          step1a[2] = _mm_slli_epi16(step1a[2], 2);
+          step1a[3] = _mm_slli_epi16(step1a[3], 2);
+          step1b[-3] = _mm_slli_epi16(step1b[-3], 2);
+          step1b[-2] = _mm_slli_epi16(step1b[-2], 2);
+          step1b[-1] = _mm_slli_epi16(step1b[-1], 2);
+          step1b[-0] = _mm_slli_epi16(step1b[-0], 2);
+        }
+        {
+          const int16_t *ina = in + 4 * str1;
+          const int16_t *inb = in + 27 * str1;
+          __m128i *step1a = &step1[4];
+          __m128i *step1b = &step1[27];
+          const __m128i ina0 = _mm_loadu_si128((const __m128i *)(ina));
+          const __m128i ina1 = _mm_loadu_si128((const __m128i *)(ina + str1));
+          const __m128i ina2 = _mm_loadu_si128((const __m128i *)(ina + str2));
+          const __m128i ina3 = _mm_loadu_si128((const __m128i *)(ina + str3));
+          const __m128i inb3 = _mm_loadu_si128((const __m128i *)(inb - str3));
+          const __m128i inb2 = _mm_loadu_si128((const __m128i *)(inb - str2));
+          const __m128i inb1 = _mm_loadu_si128((const __m128i *)(inb - str1));
+          const __m128i inb0 = _mm_loadu_si128((const __m128i *)(inb));
+          step1a[0] = _mm_add_epi16(ina0, inb0);
+          step1a[1] = _mm_add_epi16(ina1, inb1);
+          step1a[2] = _mm_add_epi16(ina2, inb2);
+          step1a[3] = _mm_add_epi16(ina3, inb3);
+          step1b[-3] = _mm_sub_epi16(ina3, inb3);
+          step1b[-2] = _mm_sub_epi16(ina2, inb2);
+          step1b[-1] = _mm_sub_epi16(ina1, inb1);
+          step1b[-0] = _mm_sub_epi16(ina0, inb0);
+          step1a[0] = _mm_slli_epi16(step1a[0], 2);
+          step1a[1] = _mm_slli_epi16(step1a[1], 2);
+          step1a[2] = _mm_slli_epi16(step1a[2], 2);
+          step1a[3] = _mm_slli_epi16(step1a[3], 2);
+          step1b[-3] = _mm_slli_epi16(step1b[-3], 2);
+          step1b[-2] = _mm_slli_epi16(step1b[-2], 2);
+          step1b[-1] = _mm_slli_epi16(step1b[-1], 2);
+          step1b[-0] = _mm_slli_epi16(step1b[-0], 2);
+        }
+        {
+          const int16_t *ina = in + 8 * str1;
+          const int16_t *inb = in + 23 * str1;
+          __m128i *step1a = &step1[8];
+          __m128i *step1b = &step1[23];
+          const __m128i ina0 = _mm_loadu_si128((const __m128i *)(ina));
+          const __m128i ina1 = _mm_loadu_si128((const __m128i *)(ina + str1));
+          const __m128i ina2 = _mm_loadu_si128((const __m128i *)(ina + str2));
+          const __m128i ina3 = _mm_loadu_si128((const __m128i *)(ina + str3));
+          const __m128i inb3 = _mm_loadu_si128((const __m128i *)(inb - str3));
+          const __m128i inb2 = _mm_loadu_si128((const __m128i *)(inb - str2));
+          const __m128i inb1 = _mm_loadu_si128((const __m128i *)(inb - str1));
+          const __m128i inb0 = _mm_loadu_si128((const __m128i *)(inb));
+          step1a[0] = _mm_add_epi16(ina0, inb0);
+          step1a[1] = _mm_add_epi16(ina1, inb1);
+          step1a[2] = _mm_add_epi16(ina2, inb2);
+          step1a[3] = _mm_add_epi16(ina3, inb3);
+          step1b[-3] = _mm_sub_epi16(ina3, inb3);
+          step1b[-2] = _mm_sub_epi16(ina2, inb2);
+          step1b[-1] = _mm_sub_epi16(ina1, inb1);
+          step1b[-0] = _mm_sub_epi16(ina0, inb0);
+          step1a[0] = _mm_slli_epi16(step1a[0], 2);
+          step1a[1] = _mm_slli_epi16(step1a[1], 2);
+          step1a[2] = _mm_slli_epi16(step1a[2], 2);
+          step1a[3] = _mm_slli_epi16(step1a[3], 2);
+          step1b[-3] = _mm_slli_epi16(step1b[-3], 2);
+          step1b[-2] = _mm_slli_epi16(step1b[-2], 2);
+          step1b[-1] = _mm_slli_epi16(step1b[-1], 2);
+          step1b[-0] = _mm_slli_epi16(step1b[-0], 2);
+        }
+        {
+          const int16_t *ina = in + 12 * str1;
+          const int16_t *inb = in + 19 * str1;
+          __m128i *step1a = &step1[12];
+          __m128i *step1b = &step1[19];
+          const __m128i ina0 = _mm_loadu_si128((const __m128i *)(ina));
+          const __m128i ina1 = _mm_loadu_si128((const __m128i *)(ina + str1));
+          const __m128i ina2 = _mm_loadu_si128((const __m128i *)(ina + str2));
+          const __m128i ina3 = _mm_loadu_si128((const __m128i *)(ina + str3));
+          const __m128i inb3 = _mm_loadu_si128((const __m128i *)(inb - str3));
+          const __m128i inb2 = _mm_loadu_si128((const __m128i *)(inb - str2));
+          const __m128i inb1 = _mm_loadu_si128((const __m128i *)(inb - str1));
+          const __m128i inb0 = _mm_loadu_si128((const __m128i *)(inb));
+          step1a[0] = _mm_add_epi16(ina0, inb0);
+          step1a[1] = _mm_add_epi16(ina1, inb1);
+          step1a[2] = _mm_add_epi16(ina2, inb2);
+          step1a[3] = _mm_add_epi16(ina3, inb3);
+          step1b[-3] = _mm_sub_epi16(ina3, inb3);
+          step1b[-2] = _mm_sub_epi16(ina2, inb2);
+          step1b[-1] = _mm_sub_epi16(ina1, inb1);
+          step1b[-0] = _mm_sub_epi16(ina0, inb0);
+          step1a[0] = _mm_slli_epi16(step1a[0], 2);
+          step1a[1] = _mm_slli_epi16(step1a[1], 2);
+          step1a[2] = _mm_slli_epi16(step1a[2], 2);
+          step1a[3] = _mm_slli_epi16(step1a[3], 2);
+          step1b[-3] = _mm_slli_epi16(step1b[-3], 2);
+          step1b[-2] = _mm_slli_epi16(step1b[-2], 2);
+          step1b[-1] = _mm_slli_epi16(step1b[-1], 2);
+          step1b[-0] = _mm_slli_epi16(step1b[-0], 2);
+        }
+      } else {
+        int16_t *in = &intermediate[column_start];
+        // step1[i] =  in[ 0 * 32] + in[(32 -  1) * 32];
+        // Note: using the same approach as above to have common offset is
+        //       counter-productive as all offsets can be calculated at compile
+        //       time.
+        // Note: the next four blocks could be in a loop. That would help the
+        //       instruction cache but is actually slower.
+        {
+          __m128i in00 = _mm_loadu_si128((const __m128i *)(in + 0 * 32));
+          __m128i in01 = _mm_loadu_si128((const __m128i *)(in + 1 * 32));
+          __m128i in02 = _mm_loadu_si128((const __m128i *)(in + 2 * 32));
+          __m128i in03 = _mm_loadu_si128((const __m128i *)(in + 3 * 32));
+          __m128i in28 = _mm_loadu_si128((const __m128i *)(in + 28 * 32));
+          __m128i in29 = _mm_loadu_si128((const __m128i *)(in + 29 * 32));
+          __m128i in30 = _mm_loadu_si128((const __m128i *)(in + 30 * 32));
+          __m128i in31 = _mm_loadu_si128((const __m128i *)(in + 31 * 32));
+          step1[0] = ADD_EPI16(in00, in31);
+          step1[1] = ADD_EPI16(in01, in30);
+          step1[2] = ADD_EPI16(in02, in29);
+          step1[3] = ADD_EPI16(in03, in28);
+          step1[28] = SUB_EPI16(in03, in28);
+          step1[29] = SUB_EPI16(in02, in29);
+          step1[30] = SUB_EPI16(in01, in30);
+          step1[31] = SUB_EPI16(in00, in31);
+#if DCT_HIGH_BIT_DEPTH
+          overflow = check_epi16_overflow_x8(&step1[0], &step1[1], &step1[2],
+                                             &step1[3], &step1[28], &step1[29],
+                                             &step1[30], &step1[31]);
+          if (overflow) {
+            HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        {
+          __m128i in04 = _mm_loadu_si128((const __m128i *)(in + 4 * 32));
+          __m128i in05 = _mm_loadu_si128((const __m128i *)(in + 5 * 32));
+          __m128i in06 = _mm_loadu_si128((const __m128i *)(in + 6 * 32));
+          __m128i in07 = _mm_loadu_si128((const __m128i *)(in + 7 * 32));
+          __m128i in24 = _mm_loadu_si128((const __m128i *)(in + 24 * 32));
+          __m128i in25 = _mm_loadu_si128((const __m128i *)(in + 25 * 32));
+          __m128i in26 = _mm_loadu_si128((const __m128i *)(in + 26 * 32));
+          __m128i in27 = _mm_loadu_si128((const __m128i *)(in + 27 * 32));
+          step1[4] = ADD_EPI16(in04, in27);
+          step1[5] = ADD_EPI16(in05, in26);
+          step1[6] = ADD_EPI16(in06, in25);
+          step1[7] = ADD_EPI16(in07, in24);
+          step1[24] = SUB_EPI16(in07, in24);
+          step1[25] = SUB_EPI16(in06, in25);
+          step1[26] = SUB_EPI16(in05, in26);
+          step1[27] = SUB_EPI16(in04, in27);
+#if DCT_HIGH_BIT_DEPTH
+          overflow = check_epi16_overflow_x8(&step1[4], &step1[5], &step1[6],
+                                             &step1[7], &step1[24], &step1[25],
+                                             &step1[26], &step1[27]);
+          if (overflow) {
+            HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        {
+          __m128i in08 = _mm_loadu_si128((const __m128i *)(in + 8 * 32));
+          __m128i in09 = _mm_loadu_si128((const __m128i *)(in + 9 * 32));
+          __m128i in10 = _mm_loadu_si128((const __m128i *)(in + 10 * 32));
+          __m128i in11 = _mm_loadu_si128((const __m128i *)(in + 11 * 32));
+          __m128i in20 = _mm_loadu_si128((const __m128i *)(in + 20 * 32));
+          __m128i in21 = _mm_loadu_si128((const __m128i *)(in + 21 * 32));
+          __m128i in22 = _mm_loadu_si128((const __m128i *)(in + 22 * 32));
+          __m128i in23 = _mm_loadu_si128((const __m128i *)(in + 23 * 32));
+          step1[8] = ADD_EPI16(in08, in23);
+          step1[9] = ADD_EPI16(in09, in22);
+          step1[10] = ADD_EPI16(in10, in21);
+          step1[11] = ADD_EPI16(in11, in20);
+          step1[20] = SUB_EPI16(in11, in20);
+          step1[21] = SUB_EPI16(in10, in21);
+          step1[22] = SUB_EPI16(in09, in22);
+          step1[23] = SUB_EPI16(in08, in23);
+#if DCT_HIGH_BIT_DEPTH
+          overflow = check_epi16_overflow_x8(&step1[8], &step1[9], &step1[10],
+                                             &step1[11], &step1[20], &step1[21],
+                                             &step1[22], &step1[23]);
+          if (overflow) {
+            HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        {
+          __m128i in12 = _mm_loadu_si128((const __m128i *)(in + 12 * 32));
+          __m128i in13 = _mm_loadu_si128((const __m128i *)(in + 13 * 32));
+          __m128i in14 = _mm_loadu_si128((const __m128i *)(in + 14 * 32));
+          __m128i in15 = _mm_loadu_si128((const __m128i *)(in + 15 * 32));
+          __m128i in16 = _mm_loadu_si128((const __m128i *)(in + 16 * 32));
+          __m128i in17 = _mm_loadu_si128((const __m128i *)(in + 17 * 32));
+          __m128i in18 = _mm_loadu_si128((const __m128i *)(in + 18 * 32));
+          __m128i in19 = _mm_loadu_si128((const __m128i *)(in + 19 * 32));
+          step1[12] = ADD_EPI16(in12, in19);
+          step1[13] = ADD_EPI16(in13, in18);
+          step1[14] = ADD_EPI16(in14, in17);
+          step1[15] = ADD_EPI16(in15, in16);
+          step1[16] = SUB_EPI16(in15, in16);
+          step1[17] = SUB_EPI16(in14, in17);
+          step1[18] = SUB_EPI16(in13, in18);
+          step1[19] = SUB_EPI16(in12, in19);
+#if DCT_HIGH_BIT_DEPTH
+          overflow = check_epi16_overflow_x8(&step1[12], &step1[13], &step1[14],
+                                             &step1[15], &step1[16], &step1[17],
+                                             &step1[18], &step1[19]);
+          if (overflow) {
+            HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+      }
+      // Stage 2
+      {
+        step2[0] = ADD_EPI16(step1[0], step1[15]);
+        step2[1] = ADD_EPI16(step1[1], step1[14]);
+        step2[2] = ADD_EPI16(step1[2], step1[13]);
+        step2[3] = ADD_EPI16(step1[3], step1[12]);
+        step2[4] = ADD_EPI16(step1[4], step1[11]);
+        step2[5] = ADD_EPI16(step1[5], step1[10]);
+        step2[6] = ADD_EPI16(step1[6], step1[9]);
+        step2[7] = ADD_EPI16(step1[7], step1[8]);
+        step2[8] = SUB_EPI16(step1[7], step1[8]);
+        step2[9] = SUB_EPI16(step1[6], step1[9]);
+        step2[10] = SUB_EPI16(step1[5], step1[10]);
+        step2[11] = SUB_EPI16(step1[4], step1[11]);
+        step2[12] = SUB_EPI16(step1[3], step1[12]);
+        step2[13] = SUB_EPI16(step1[2], step1[13]);
+        step2[14] = SUB_EPI16(step1[1], step1[14]);
+        step2[15] = SUB_EPI16(step1[0], step1[15]);
+#if DCT_HIGH_BIT_DEPTH
+        overflow = check_epi16_overflow_x16(
+            &step2[0], &step2[1], &step2[2], &step2[3], &step2[4], &step2[5],
+            &step2[6], &step2[7], &step2[8], &step2[9], &step2[10], &step2[11],
+            &step2[12], &step2[13], &step2[14], &step2[15]);
+        if (overflow) {
+          if (pass == 0)
+            HIGH_FDCT32x32_2D_C(input, output_org, stride);
+          else
+            HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+          return;
+        }
+#endif  // DCT_HIGH_BIT_DEPTH
+      }
+      {
+        const __m128i s2_20_0 = _mm_unpacklo_epi16(step1[27], step1[20]);
+        const __m128i s2_20_1 = _mm_unpackhi_epi16(step1[27], step1[20]);
+        const __m128i s2_21_0 = _mm_unpacklo_epi16(step1[26], step1[21]);
+        const __m128i s2_21_1 = _mm_unpackhi_epi16(step1[26], step1[21]);
+        const __m128i s2_22_0 = _mm_unpacklo_epi16(step1[25], step1[22]);
+        const __m128i s2_22_1 = _mm_unpackhi_epi16(step1[25], step1[22]);
+        const __m128i s2_23_0 = _mm_unpacklo_epi16(step1[24], step1[23]);
+        const __m128i s2_23_1 = _mm_unpackhi_epi16(step1[24], step1[23]);
+        const __m128i s2_20_2 = _mm_madd_epi16(s2_20_0, k__cospi_p16_m16);
+        const __m128i s2_20_3 = _mm_madd_epi16(s2_20_1, k__cospi_p16_m16);
+        const __m128i s2_21_2 = _mm_madd_epi16(s2_21_0, k__cospi_p16_m16);
+        const __m128i s2_21_3 = _mm_madd_epi16(s2_21_1, k__cospi_p16_m16);
+        const __m128i s2_22_2 = _mm_madd_epi16(s2_22_0, k__cospi_p16_m16);
+        const __m128i s2_22_3 = _mm_madd_epi16(s2_22_1, k__cospi_p16_m16);
+        const __m128i s2_23_2 = _mm_madd_epi16(s2_23_0, k__cospi_p16_m16);
+        const __m128i s2_23_3 = _mm_madd_epi16(s2_23_1, k__cospi_p16_m16);
+        const __m128i s2_24_2 = _mm_madd_epi16(s2_23_0, k__cospi_p16_p16);
+        const __m128i s2_24_3 = _mm_madd_epi16(s2_23_1, k__cospi_p16_p16);
+        const __m128i s2_25_2 = _mm_madd_epi16(s2_22_0, k__cospi_p16_p16);
+        const __m128i s2_25_3 = _mm_madd_epi16(s2_22_1, k__cospi_p16_p16);
+        const __m128i s2_26_2 = _mm_madd_epi16(s2_21_0, k__cospi_p16_p16);
+        const __m128i s2_26_3 = _mm_madd_epi16(s2_21_1, k__cospi_p16_p16);
+        const __m128i s2_27_2 = _mm_madd_epi16(s2_20_0, k__cospi_p16_p16);
+        const __m128i s2_27_3 = _mm_madd_epi16(s2_20_1, k__cospi_p16_p16);
+        // dct_const_round_shift
+        const __m128i s2_20_4 = _mm_add_epi32(s2_20_2, k__DCT_CONST_ROUNDING);
+        const __m128i s2_20_5 = _mm_add_epi32(s2_20_3, k__DCT_CONST_ROUNDING);
+        const __m128i s2_21_4 = _mm_add_epi32(s2_21_2, k__DCT_CONST_ROUNDING);
+        const __m128i s2_21_5 = _mm_add_epi32(s2_21_3, k__DCT_CONST_ROUNDING);
+        const __m128i s2_22_4 = _mm_add_epi32(s2_22_2, k__DCT_CONST_ROUNDING);
+        const __m128i s2_22_5 = _mm_add_epi32(s2_22_3, k__DCT_CONST_ROUNDING);
+        const __m128i s2_23_4 = _mm_add_epi32(s2_23_2, k__DCT_CONST_ROUNDING);
+        const __m128i s2_23_5 = _mm_add_epi32(s2_23_3, k__DCT_CONST_ROUNDING);
+        const __m128i s2_24_4 = _mm_add_epi32(s2_24_2, k__DCT_CONST_ROUNDING);
+        const __m128i s2_24_5 = _mm_add_epi32(s2_24_3, k__DCT_CONST_ROUNDING);
+        const __m128i s2_25_4 = _mm_add_epi32(s2_25_2, k__DCT_CONST_ROUNDING);
+        const __m128i s2_25_5 = _mm_add_epi32(s2_25_3, k__DCT_CONST_ROUNDING);
+        const __m128i s2_26_4 = _mm_add_epi32(s2_26_2, k__DCT_CONST_ROUNDING);
+        const __m128i s2_26_5 = _mm_add_epi32(s2_26_3, k__DCT_CONST_ROUNDING);
+        const __m128i s2_27_4 = _mm_add_epi32(s2_27_2, k__DCT_CONST_ROUNDING);
+        const __m128i s2_27_5 = _mm_add_epi32(s2_27_3, k__DCT_CONST_ROUNDING);
+        const __m128i s2_20_6 = _mm_srai_epi32(s2_20_4, DCT_CONST_BITS);
+        const __m128i s2_20_7 = _mm_srai_epi32(s2_20_5, DCT_CONST_BITS);
+        const __m128i s2_21_6 = _mm_srai_epi32(s2_21_4, DCT_CONST_BITS);
+        const __m128i s2_21_7 = _mm_srai_epi32(s2_21_5, DCT_CONST_BITS);
+        const __m128i s2_22_6 = _mm_srai_epi32(s2_22_4, DCT_CONST_BITS);
+        const __m128i s2_22_7 = _mm_srai_epi32(s2_22_5, DCT_CONST_BITS);
+        const __m128i s2_23_6 = _mm_srai_epi32(s2_23_4, DCT_CONST_BITS);
+        const __m128i s2_23_7 = _mm_srai_epi32(s2_23_5, DCT_CONST_BITS);
+        const __m128i s2_24_6 = _mm_srai_epi32(s2_24_4, DCT_CONST_BITS);
+        const __m128i s2_24_7 = _mm_srai_epi32(s2_24_5, DCT_CONST_BITS);
+        const __m128i s2_25_6 = _mm_srai_epi32(s2_25_4, DCT_CONST_BITS);
+        const __m128i s2_25_7 = _mm_srai_epi32(s2_25_5, DCT_CONST_BITS);
+        const __m128i s2_26_6 = _mm_srai_epi32(s2_26_4, DCT_CONST_BITS);
+        const __m128i s2_26_7 = _mm_srai_epi32(s2_26_5, DCT_CONST_BITS);
+        const __m128i s2_27_6 = _mm_srai_epi32(s2_27_4, DCT_CONST_BITS);
+        const __m128i s2_27_7 = _mm_srai_epi32(s2_27_5, DCT_CONST_BITS);
+        // Combine
+        step2[20] = _mm_packs_epi32(s2_20_6, s2_20_7);
+        step2[21] = _mm_packs_epi32(s2_21_6, s2_21_7);
+        step2[22] = _mm_packs_epi32(s2_22_6, s2_22_7);
+        step2[23] = _mm_packs_epi32(s2_23_6, s2_23_7);
+        step2[24] = _mm_packs_epi32(s2_24_6, s2_24_7);
+        step2[25] = _mm_packs_epi32(s2_25_6, s2_25_7);
+        step2[26] = _mm_packs_epi32(s2_26_6, s2_26_7);
+        step2[27] = _mm_packs_epi32(s2_27_6, s2_27_7);
+#if DCT_HIGH_BIT_DEPTH
+        overflow = check_epi16_overflow_x8(&step2[20], &step2[21], &step2[22],
+                                           &step2[23], &step2[24], &step2[25],
+                                           &step2[26], &step2[27]);
+        if (overflow) {
+          if (pass == 0)
+            HIGH_FDCT32x32_2D_C(input, output_org, stride);
+          else
+            HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+          return;
+        }
+#endif  // DCT_HIGH_BIT_DEPTH
+      }
+
+#if !FDCT32x32_HIGH_PRECISION
+      // dump the magnitude by half, hence the intermediate values are within
+      // the range of 16 bits.
+      if (1 == pass) {
+        __m128i s3_00_0 = _mm_cmplt_epi16(step2[0], kZero);
+        __m128i s3_01_0 = _mm_cmplt_epi16(step2[1], kZero);
+        __m128i s3_02_0 = _mm_cmplt_epi16(step2[2], kZero);
+        __m128i s3_03_0 = _mm_cmplt_epi16(step2[3], kZero);
+        __m128i s3_04_0 = _mm_cmplt_epi16(step2[4], kZero);
+        __m128i s3_05_0 = _mm_cmplt_epi16(step2[5], kZero);
+        __m128i s3_06_0 = _mm_cmplt_epi16(step2[6], kZero);
+        __m128i s3_07_0 = _mm_cmplt_epi16(step2[7], kZero);
+        __m128i s2_08_0 = _mm_cmplt_epi16(step2[8], kZero);
+        __m128i s2_09_0 = _mm_cmplt_epi16(step2[9], kZero);
+        __m128i s3_10_0 = _mm_cmplt_epi16(step2[10], kZero);
+        __m128i s3_11_0 = _mm_cmplt_epi16(step2[11], kZero);
+        __m128i s3_12_0 = _mm_cmplt_epi16(step2[12], kZero);
+        __m128i s3_13_0 = _mm_cmplt_epi16(step2[13], kZero);
+        __m128i s2_14_0 = _mm_cmplt_epi16(step2[14], kZero);
+        __m128i s2_15_0 = _mm_cmplt_epi16(step2[15], kZero);
+        __m128i s3_16_0 = _mm_cmplt_epi16(step1[16], kZero);
+        __m128i s3_17_0 = _mm_cmplt_epi16(step1[17], kZero);
+        __m128i s3_18_0 = _mm_cmplt_epi16(step1[18], kZero);
+        __m128i s3_19_0 = _mm_cmplt_epi16(step1[19], kZero);
+        __m128i s3_20_0 = _mm_cmplt_epi16(step2[20], kZero);
+        __m128i s3_21_0 = _mm_cmplt_epi16(step2[21], kZero);
+        __m128i s3_22_0 = _mm_cmplt_epi16(step2[22], kZero);
+        __m128i s3_23_0 = _mm_cmplt_epi16(step2[23], kZero);
+        __m128i s3_24_0 = _mm_cmplt_epi16(step2[24], kZero);
+        __m128i s3_25_0 = _mm_cmplt_epi16(step2[25], kZero);
+        __m128i s3_26_0 = _mm_cmplt_epi16(step2[26], kZero);
+        __m128i s3_27_0 = _mm_cmplt_epi16(step2[27], kZero);
+        __m128i s3_28_0 = _mm_cmplt_epi16(step1[28], kZero);
+        __m128i s3_29_0 = _mm_cmplt_epi16(step1[29], kZero);
+        __m128i s3_30_0 = _mm_cmplt_epi16(step1[30], kZero);
+        __m128i s3_31_0 = _mm_cmplt_epi16(step1[31], kZero);
+
+        step2[0] = SUB_EPI16(step2[0], s3_00_0);
+        step2[1] = SUB_EPI16(step2[1], s3_01_0);
+        step2[2] = SUB_EPI16(step2[2], s3_02_0);
+        step2[3] = SUB_EPI16(step2[3], s3_03_0);
+        step2[4] = SUB_EPI16(step2[4], s3_04_0);
+        step2[5] = SUB_EPI16(step2[5], s3_05_0);
+        step2[6] = SUB_EPI16(step2[6], s3_06_0);
+        step2[7] = SUB_EPI16(step2[7], s3_07_0);
+        step2[8] = SUB_EPI16(step2[8], s2_08_0);
+        step2[9] = SUB_EPI16(step2[9], s2_09_0);
+        step2[10] = SUB_EPI16(step2[10], s3_10_0);
+        step2[11] = SUB_EPI16(step2[11], s3_11_0);
+        step2[12] = SUB_EPI16(step2[12], s3_12_0);
+        step2[13] = SUB_EPI16(step2[13], s3_13_0);
+        step2[14] = SUB_EPI16(step2[14], s2_14_0);
+        step2[15] = SUB_EPI16(step2[15], s2_15_0);
+        step1[16] = SUB_EPI16(step1[16], s3_16_0);
+        step1[17] = SUB_EPI16(step1[17], s3_17_0);
+        step1[18] = SUB_EPI16(step1[18], s3_18_0);
+        step1[19] = SUB_EPI16(step1[19], s3_19_0);
+        step2[20] = SUB_EPI16(step2[20], s3_20_0);
+        step2[21] = SUB_EPI16(step2[21], s3_21_0);
+        step2[22] = SUB_EPI16(step2[22], s3_22_0);
+        step2[23] = SUB_EPI16(step2[23], s3_23_0);
+        step2[24] = SUB_EPI16(step2[24], s3_24_0);
+        step2[25] = SUB_EPI16(step2[25], s3_25_0);
+        step2[26] = SUB_EPI16(step2[26], s3_26_0);
+        step2[27] = SUB_EPI16(step2[27], s3_27_0);
+        step1[28] = SUB_EPI16(step1[28], s3_28_0);
+        step1[29] = SUB_EPI16(step1[29], s3_29_0);
+        step1[30] = SUB_EPI16(step1[30], s3_30_0);
+        step1[31] = SUB_EPI16(step1[31], s3_31_0);
+#if DCT_HIGH_BIT_DEPTH
+        overflow = check_epi16_overflow_x32(
+            &step2[0], &step2[1], &step2[2], &step2[3], &step2[4], &step2[5],
+            &step2[6], &step2[7], &step2[8], &step2[9], &step2[10], &step2[11],
+            &step2[12], &step2[13], &step2[14], &step2[15], &step1[16],
+            &step1[17], &step1[18], &step1[19], &step2[20], &step2[21],
+            &step2[22], &step2[23], &step2[24], &step2[25], &step2[26],
+            &step2[27], &step1[28], &step1[29], &step1[30], &step1[31]);
+        if (overflow) {
+          HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+          return;
+        }
+#endif  // DCT_HIGH_BIT_DEPTH
+        step2[0] = _mm_add_epi16(step2[0], kOne);
+        step2[1] = _mm_add_epi16(step2[1], kOne);
+        step2[2] = _mm_add_epi16(step2[2], kOne);
+        step2[3] = _mm_add_epi16(step2[3], kOne);
+        step2[4] = _mm_add_epi16(step2[4], kOne);
+        step2[5] = _mm_add_epi16(step2[5], kOne);
+        step2[6] = _mm_add_epi16(step2[6], kOne);
+        step2[7] = _mm_add_epi16(step2[7], kOne);
+        step2[8] = _mm_add_epi16(step2[8], kOne);
+        step2[9] = _mm_add_epi16(step2[9], kOne);
+        step2[10] = _mm_add_epi16(step2[10], kOne);
+        step2[11] = _mm_add_epi16(step2[11], kOne);
+        step2[12] = _mm_add_epi16(step2[12], kOne);
+        step2[13] = _mm_add_epi16(step2[13], kOne);
+        step2[14] = _mm_add_epi16(step2[14], kOne);
+        step2[15] = _mm_add_epi16(step2[15], kOne);
+        step1[16] = _mm_add_epi16(step1[16], kOne);
+        step1[17] = _mm_add_epi16(step1[17], kOne);
+        step1[18] = _mm_add_epi16(step1[18], kOne);
+        step1[19] = _mm_add_epi16(step1[19], kOne);
+        step2[20] = _mm_add_epi16(step2[20], kOne);
+        step2[21] = _mm_add_epi16(step2[21], kOne);
+        step2[22] = _mm_add_epi16(step2[22], kOne);
+        step2[23] = _mm_add_epi16(step2[23], kOne);
+        step2[24] = _mm_add_epi16(step2[24], kOne);
+        step2[25] = _mm_add_epi16(step2[25], kOne);
+        step2[26] = _mm_add_epi16(step2[26], kOne);
+        step2[27] = _mm_add_epi16(step2[27], kOne);
+        step1[28] = _mm_add_epi16(step1[28], kOne);
+        step1[29] = _mm_add_epi16(step1[29], kOne);
+        step1[30] = _mm_add_epi16(step1[30], kOne);
+        step1[31] = _mm_add_epi16(step1[31], kOne);
+
+        step2[0] = _mm_srai_epi16(step2[0], 2);
+        step2[1] = _mm_srai_epi16(step2[1], 2);
+        step2[2] = _mm_srai_epi16(step2[2], 2);
+        step2[3] = _mm_srai_epi16(step2[3], 2);
+        step2[4] = _mm_srai_epi16(step2[4], 2);
+        step2[5] = _mm_srai_epi16(step2[5], 2);
+        step2[6] = _mm_srai_epi16(step2[6], 2);
+        step2[7] = _mm_srai_epi16(step2[7], 2);
+        step2[8] = _mm_srai_epi16(step2[8], 2);
+        step2[9] = _mm_srai_epi16(step2[9], 2);
+        step2[10] = _mm_srai_epi16(step2[10], 2);
+        step2[11] = _mm_srai_epi16(step2[11], 2);
+        step2[12] = _mm_srai_epi16(step2[12], 2);
+        step2[13] = _mm_srai_epi16(step2[13], 2);
+        step2[14] = _mm_srai_epi16(step2[14], 2);
+        step2[15] = _mm_srai_epi16(step2[15], 2);
+        step1[16] = _mm_srai_epi16(step1[16], 2);
+        step1[17] = _mm_srai_epi16(step1[17], 2);
+        step1[18] = _mm_srai_epi16(step1[18], 2);
+        step1[19] = _mm_srai_epi16(step1[19], 2);
+        step2[20] = _mm_srai_epi16(step2[20], 2);
+        step2[21] = _mm_srai_epi16(step2[21], 2);
+        step2[22] = _mm_srai_epi16(step2[22], 2);
+        step2[23] = _mm_srai_epi16(step2[23], 2);
+        step2[24] = _mm_srai_epi16(step2[24], 2);
+        step2[25] = _mm_srai_epi16(step2[25], 2);
+        step2[26] = _mm_srai_epi16(step2[26], 2);
+        step2[27] = _mm_srai_epi16(step2[27], 2);
+        step1[28] = _mm_srai_epi16(step1[28], 2);
+        step1[29] = _mm_srai_epi16(step1[29], 2);
+        step1[30] = _mm_srai_epi16(step1[30], 2);
+        step1[31] = _mm_srai_epi16(step1[31], 2);
+      }
+#endif  // !FDCT32x32_HIGH_PRECISION
+
+#if FDCT32x32_HIGH_PRECISION
+      if (pass == 0) {
+#endif
+        // Stage 3
+        {
+          step3[0] = ADD_EPI16(step2[(8 - 1)], step2[0]);
+          step3[1] = ADD_EPI16(step2[(8 - 2)], step2[1]);
+          step3[2] = ADD_EPI16(step2[(8 - 3)], step2[2]);
+          step3[3] = ADD_EPI16(step2[(8 - 4)], step2[3]);
+          step3[4] = SUB_EPI16(step2[(8 - 5)], step2[4]);
+          step3[5] = SUB_EPI16(step2[(8 - 6)], step2[5]);
+          step3[6] = SUB_EPI16(step2[(8 - 7)], step2[6]);
+          step3[7] = SUB_EPI16(step2[(8 - 8)], step2[7]);
+#if DCT_HIGH_BIT_DEPTH
+          overflow = check_epi16_overflow_x8(&step3[0], &step3[1], &step3[2],
+                                             &step3[3], &step3[4], &step3[5],
+                                             &step3[6], &step3[7]);
+          if (overflow) {
+            if (pass == 0)
+              HIGH_FDCT32x32_2D_C(input, output_org, stride);
+            else
+              HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        {
+          const __m128i s3_10_0 = _mm_unpacklo_epi16(step2[13], step2[10]);
+          const __m128i s3_10_1 = _mm_unpackhi_epi16(step2[13], step2[10]);
+          const __m128i s3_11_0 = _mm_unpacklo_epi16(step2[12], step2[11]);
+          const __m128i s3_11_1 = _mm_unpackhi_epi16(step2[12], step2[11]);
+          const __m128i s3_10_2 = _mm_madd_epi16(s3_10_0, k__cospi_p16_m16);
+          const __m128i s3_10_3 = _mm_madd_epi16(s3_10_1, k__cospi_p16_m16);
+          const __m128i s3_11_2 = _mm_madd_epi16(s3_11_0, k__cospi_p16_m16);
+          const __m128i s3_11_3 = _mm_madd_epi16(s3_11_1, k__cospi_p16_m16);
+          const __m128i s3_12_2 = _mm_madd_epi16(s3_11_0, k__cospi_p16_p16);
+          const __m128i s3_12_3 = _mm_madd_epi16(s3_11_1, k__cospi_p16_p16);
+          const __m128i s3_13_2 = _mm_madd_epi16(s3_10_0, k__cospi_p16_p16);
+          const __m128i s3_13_3 = _mm_madd_epi16(s3_10_1, k__cospi_p16_p16);
+          // dct_const_round_shift
+          const __m128i s3_10_4 = _mm_add_epi32(s3_10_2, k__DCT_CONST_ROUNDING);
+          const __m128i s3_10_5 = _mm_add_epi32(s3_10_3, k__DCT_CONST_ROUNDING);
+          const __m128i s3_11_4 = _mm_add_epi32(s3_11_2, k__DCT_CONST_ROUNDING);
+          const __m128i s3_11_5 = _mm_add_epi32(s3_11_3, k__DCT_CONST_ROUNDING);
+          const __m128i s3_12_4 = _mm_add_epi32(s3_12_2, k__DCT_CONST_ROUNDING);
+          const __m128i s3_12_5 = _mm_add_epi32(s3_12_3, k__DCT_CONST_ROUNDING);
+          const __m128i s3_13_4 = _mm_add_epi32(s3_13_2, k__DCT_CONST_ROUNDING);
+          const __m128i s3_13_5 = _mm_add_epi32(s3_13_3, k__DCT_CONST_ROUNDING);
+          const __m128i s3_10_6 = _mm_srai_epi32(s3_10_4, DCT_CONST_BITS);
+          const __m128i s3_10_7 = _mm_srai_epi32(s3_10_5, DCT_CONST_BITS);
+          const __m128i s3_11_6 = _mm_srai_epi32(s3_11_4, DCT_CONST_BITS);
+          const __m128i s3_11_7 = _mm_srai_epi32(s3_11_5, DCT_CONST_BITS);
+          const __m128i s3_12_6 = _mm_srai_epi32(s3_12_4, DCT_CONST_BITS);
+          const __m128i s3_12_7 = _mm_srai_epi32(s3_12_5, DCT_CONST_BITS);
+          const __m128i s3_13_6 = _mm_srai_epi32(s3_13_4, DCT_CONST_BITS);
+          const __m128i s3_13_7 = _mm_srai_epi32(s3_13_5, DCT_CONST_BITS);
+          // Combine
+          step3[10] = _mm_packs_epi32(s3_10_6, s3_10_7);
+          step3[11] = _mm_packs_epi32(s3_11_6, s3_11_7);
+          step3[12] = _mm_packs_epi32(s3_12_6, s3_12_7);
+          step3[13] = _mm_packs_epi32(s3_13_6, s3_13_7);
+#if DCT_HIGH_BIT_DEPTH
+          overflow = check_epi16_overflow_x4(&step3[10], &step3[11], &step3[12],
+                                             &step3[13]);
+          if (overflow) {
+            if (pass == 0)
+              HIGH_FDCT32x32_2D_C(input, output_org, stride);
+            else
+              HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        {
+          step3[16] = ADD_EPI16(step2[23], step1[16]);
+          step3[17] = ADD_EPI16(step2[22], step1[17]);
+          step3[18] = ADD_EPI16(step2[21], step1[18]);
+          step3[19] = ADD_EPI16(step2[20], step1[19]);
+          step3[20] = SUB_EPI16(step1[19], step2[20]);
+          step3[21] = SUB_EPI16(step1[18], step2[21]);
+          step3[22] = SUB_EPI16(step1[17], step2[22]);
+          step3[23] = SUB_EPI16(step1[16], step2[23]);
+          step3[24] = SUB_EPI16(step1[31], step2[24]);
+          step3[25] = SUB_EPI16(step1[30], step2[25]);
+          step3[26] = SUB_EPI16(step1[29], step2[26]);
+          step3[27] = SUB_EPI16(step1[28], step2[27]);
+          step3[28] = ADD_EPI16(step2[27], step1[28]);
+          step3[29] = ADD_EPI16(step2[26], step1[29]);
+          step3[30] = ADD_EPI16(step2[25], step1[30]);
+          step3[31] = ADD_EPI16(step2[24], step1[31]);
+#if DCT_HIGH_BIT_DEPTH
+          overflow = check_epi16_overflow_x16(
+              &step3[16], &step3[17], &step3[18], &step3[19], &step3[20],
+              &step3[21], &step3[22], &step3[23], &step3[24], &step3[25],
+              &step3[26], &step3[27], &step3[28], &step3[29], &step3[30],
+              &step3[31]);
+          if (overflow) {
+            if (pass == 0)
+              HIGH_FDCT32x32_2D_C(input, output_org, stride);
+            else
+              HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+
+        // Stage 4
+        {
+          step1[0] = ADD_EPI16(step3[3], step3[0]);
+          step1[1] = ADD_EPI16(step3[2], step3[1]);
+          step1[2] = SUB_EPI16(step3[1], step3[2]);
+          step1[3] = SUB_EPI16(step3[0], step3[3]);
+          step1[8] = ADD_EPI16(step3[11], step2[8]);
+          step1[9] = ADD_EPI16(step3[10], step2[9]);
+          step1[10] = SUB_EPI16(step2[9], step3[10]);
+          step1[11] = SUB_EPI16(step2[8], step3[11]);
+          step1[12] = SUB_EPI16(step2[15], step3[12]);
+          step1[13] = SUB_EPI16(step2[14], step3[13]);
+          step1[14] = ADD_EPI16(step3[13], step2[14]);
+          step1[15] = ADD_EPI16(step3[12], step2[15]);
+#if DCT_HIGH_BIT_DEPTH
+          overflow = check_epi16_overflow_x16(
+              &step1[0], &step1[1], &step1[2], &step1[3], &step1[4], &step1[5],
+              &step1[6], &step1[7], &step1[8], &step1[9], &step1[10],
+              &step1[11], &step1[12], &step1[13], &step1[14], &step1[15]);
+          if (overflow) {
+            if (pass == 0)
+              HIGH_FDCT32x32_2D_C(input, output_org, stride);
+            else
+              HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        {
+          const __m128i s1_05_0 = _mm_unpacklo_epi16(step3[6], step3[5]);
+          const __m128i s1_05_1 = _mm_unpackhi_epi16(step3[6], step3[5]);
+          const __m128i s1_05_2 = _mm_madd_epi16(s1_05_0, k__cospi_p16_m16);
+          const __m128i s1_05_3 = _mm_madd_epi16(s1_05_1, k__cospi_p16_m16);
+          const __m128i s1_06_2 = _mm_madd_epi16(s1_05_0, k__cospi_p16_p16);
+          const __m128i s1_06_3 = _mm_madd_epi16(s1_05_1, k__cospi_p16_p16);
+          // dct_const_round_shift
+          const __m128i s1_05_4 = _mm_add_epi32(s1_05_2, k__DCT_CONST_ROUNDING);
+          const __m128i s1_05_5 = _mm_add_epi32(s1_05_3, k__DCT_CONST_ROUNDING);
+          const __m128i s1_06_4 = _mm_add_epi32(s1_06_2, k__DCT_CONST_ROUNDING);
+          const __m128i s1_06_5 = _mm_add_epi32(s1_06_3, k__DCT_CONST_ROUNDING);
+          const __m128i s1_05_6 = _mm_srai_epi32(s1_05_4, DCT_CONST_BITS);
+          const __m128i s1_05_7 = _mm_srai_epi32(s1_05_5, DCT_CONST_BITS);
+          const __m128i s1_06_6 = _mm_srai_epi32(s1_06_4, DCT_CONST_BITS);
+          const __m128i s1_06_7 = _mm_srai_epi32(s1_06_5, DCT_CONST_BITS);
+          // Combine
+          step1[5] = _mm_packs_epi32(s1_05_6, s1_05_7);
+          step1[6] = _mm_packs_epi32(s1_06_6, s1_06_7);
+#if DCT_HIGH_BIT_DEPTH
+          overflow = check_epi16_overflow_x2(&step1[5], &step1[6]);
+          if (overflow) {
+            if (pass == 0)
+              HIGH_FDCT32x32_2D_C(input, output_org, stride);
+            else
+              HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        {
+          const __m128i s1_18_0 = _mm_unpacklo_epi16(step3[18], step3[29]);
+          const __m128i s1_18_1 = _mm_unpackhi_epi16(step3[18], step3[29]);
+          const __m128i s1_19_0 = _mm_unpacklo_epi16(step3[19], step3[28]);
+          const __m128i s1_19_1 = _mm_unpackhi_epi16(step3[19], step3[28]);
+          const __m128i s1_20_0 = _mm_unpacklo_epi16(step3[20], step3[27]);
+          const __m128i s1_20_1 = _mm_unpackhi_epi16(step3[20], step3[27]);
+          const __m128i s1_21_0 = _mm_unpacklo_epi16(step3[21], step3[26]);
+          const __m128i s1_21_1 = _mm_unpackhi_epi16(step3[21], step3[26]);
+          const __m128i s1_18_2 = _mm_madd_epi16(s1_18_0, k__cospi_m08_p24);
+          const __m128i s1_18_3 = _mm_madd_epi16(s1_18_1, k__cospi_m08_p24);
+          const __m128i s1_19_2 = _mm_madd_epi16(s1_19_0, k__cospi_m08_p24);
+          const __m128i s1_19_3 = _mm_madd_epi16(s1_19_1, k__cospi_m08_p24);
+          const __m128i s1_20_2 = _mm_madd_epi16(s1_20_0, k__cospi_m24_m08);
+          const __m128i s1_20_3 = _mm_madd_epi16(s1_20_1, k__cospi_m24_m08);
+          const __m128i s1_21_2 = _mm_madd_epi16(s1_21_0, k__cospi_m24_m08);
+          const __m128i s1_21_3 = _mm_madd_epi16(s1_21_1, k__cospi_m24_m08);
+          const __m128i s1_26_2 = _mm_madd_epi16(s1_21_0, k__cospi_m08_p24);
+          const __m128i s1_26_3 = _mm_madd_epi16(s1_21_1, k__cospi_m08_p24);
+          const __m128i s1_27_2 = _mm_madd_epi16(s1_20_0, k__cospi_m08_p24);
+          const __m128i s1_27_3 = _mm_madd_epi16(s1_20_1, k__cospi_m08_p24);
+          const __m128i s1_28_2 = _mm_madd_epi16(s1_19_0, k__cospi_p24_p08);
+          const __m128i s1_28_3 = _mm_madd_epi16(s1_19_1, k__cospi_p24_p08);
+          const __m128i s1_29_2 = _mm_madd_epi16(s1_18_0, k__cospi_p24_p08);
+          const __m128i s1_29_3 = _mm_madd_epi16(s1_18_1, k__cospi_p24_p08);
+          // dct_const_round_shift
+          const __m128i s1_18_4 = _mm_add_epi32(s1_18_2, k__DCT_CONST_ROUNDING);
+          const __m128i s1_18_5 = _mm_add_epi32(s1_18_3, k__DCT_CONST_ROUNDING);
+          const __m128i s1_19_4 = _mm_add_epi32(s1_19_2, k__DCT_CONST_ROUNDING);
+          const __m128i s1_19_5 = _mm_add_epi32(s1_19_3, k__DCT_CONST_ROUNDING);
+          const __m128i s1_20_4 = _mm_add_epi32(s1_20_2, k__DCT_CONST_ROUNDING);
+          const __m128i s1_20_5 = _mm_add_epi32(s1_20_3, k__DCT_CONST_ROUNDING);
+          const __m128i s1_21_4 = _mm_add_epi32(s1_21_2, k__DCT_CONST_ROUNDING);
+          const __m128i s1_21_5 = _mm_add_epi32(s1_21_3, k__DCT_CONST_ROUNDING);
+          const __m128i s1_26_4 = _mm_add_epi32(s1_26_2, k__DCT_CONST_ROUNDING);
+          const __m128i s1_26_5 = _mm_add_epi32(s1_26_3, k__DCT_CONST_ROUNDING);
+          const __m128i s1_27_4 = _mm_add_epi32(s1_27_2, k__DCT_CONST_ROUNDING);
+          const __m128i s1_27_5 = _mm_add_epi32(s1_27_3, k__DCT_CONST_ROUNDING);
+          const __m128i s1_28_4 = _mm_add_epi32(s1_28_2, k__DCT_CONST_ROUNDING);
+          const __m128i s1_28_5 = _mm_add_epi32(s1_28_3, k__DCT_CONST_ROUNDING);
+          const __m128i s1_29_4 = _mm_add_epi32(s1_29_2, k__DCT_CONST_ROUNDING);
+          const __m128i s1_29_5 = _mm_add_epi32(s1_29_3, k__DCT_CONST_ROUNDING);
+          const __m128i s1_18_6 = _mm_srai_epi32(s1_18_4, DCT_CONST_BITS);
+          const __m128i s1_18_7 = _mm_srai_epi32(s1_18_5, DCT_CONST_BITS);
+          const __m128i s1_19_6 = _mm_srai_epi32(s1_19_4, DCT_CONST_BITS);
+          const __m128i s1_19_7 = _mm_srai_epi32(s1_19_5, DCT_CONST_BITS);
+          const __m128i s1_20_6 = _mm_srai_epi32(s1_20_4, DCT_CONST_BITS);
+          const __m128i s1_20_7 = _mm_srai_epi32(s1_20_5, DCT_CONST_BITS);
+          const __m128i s1_21_6 = _mm_srai_epi32(s1_21_4, DCT_CONST_BITS);
+          const __m128i s1_21_7 = _mm_srai_epi32(s1_21_5, DCT_CONST_BITS);
+          const __m128i s1_26_6 = _mm_srai_epi32(s1_26_4, DCT_CONST_BITS);
+          const __m128i s1_26_7 = _mm_srai_epi32(s1_26_5, DCT_CONST_BITS);
+          const __m128i s1_27_6 = _mm_srai_epi32(s1_27_4, DCT_CONST_BITS);
+          const __m128i s1_27_7 = _mm_srai_epi32(s1_27_5, DCT_CONST_BITS);
+          const __m128i s1_28_6 = _mm_srai_epi32(s1_28_4, DCT_CONST_BITS);
+          const __m128i s1_28_7 = _mm_srai_epi32(s1_28_5, DCT_CONST_BITS);
+          const __m128i s1_29_6 = _mm_srai_epi32(s1_29_4, DCT_CONST_BITS);
+          const __m128i s1_29_7 = _mm_srai_epi32(s1_29_5, DCT_CONST_BITS);
+          // Combine
+          step1[18] = _mm_packs_epi32(s1_18_6, s1_18_7);
+          step1[19] = _mm_packs_epi32(s1_19_6, s1_19_7);
+          step1[20] = _mm_packs_epi32(s1_20_6, s1_20_7);
+          step1[21] = _mm_packs_epi32(s1_21_6, s1_21_7);
+          step1[26] = _mm_packs_epi32(s1_26_6, s1_26_7);
+          step1[27] = _mm_packs_epi32(s1_27_6, s1_27_7);
+          step1[28] = _mm_packs_epi32(s1_28_6, s1_28_7);
+          step1[29] = _mm_packs_epi32(s1_29_6, s1_29_7);
+#if DCT_HIGH_BIT_DEPTH
+          overflow = check_epi16_overflow_x8(&step1[18], &step1[19], &step1[20],
+                                             &step1[21], &step1[26], &step1[27],
+                                             &step1[28], &step1[29]);
+          if (overflow) {
+            if (pass == 0)
+              HIGH_FDCT32x32_2D_C(input, output_org, stride);
+            else
+              HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        // Stage 5
+        {
+          step2[4] = ADD_EPI16(step1[5], step3[4]);
+          step2[5] = SUB_EPI16(step3[4], step1[5]);
+          step2[6] = SUB_EPI16(step3[7], step1[6]);
+          step2[7] = ADD_EPI16(step1[6], step3[7]);
+#if DCT_HIGH_BIT_DEPTH
+          overflow = check_epi16_overflow_x4(&step2[4], &step2[5], &step2[6],
+                                             &step2[7]);
+          if (overflow) {
+            if (pass == 0)
+              HIGH_FDCT32x32_2D_C(input, output_org, stride);
+            else
+              HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        {
+          const __m128i out_00_0 = _mm_unpacklo_epi16(step1[0], step1[1]);
+          const __m128i out_00_1 = _mm_unpackhi_epi16(step1[0], step1[1]);
+          const __m128i out_08_0 = _mm_unpacklo_epi16(step1[2], step1[3]);
+          const __m128i out_08_1 = _mm_unpackhi_epi16(step1[2], step1[3]);
+          const __m128i out_00_2 = _mm_madd_epi16(out_00_0, k__cospi_p16_p16);
+          const __m128i out_00_3 = _mm_madd_epi16(out_00_1, k__cospi_p16_p16);
+          const __m128i out_16_2 = _mm_madd_epi16(out_00_0, k__cospi_p16_m16);
+          const __m128i out_16_3 = _mm_madd_epi16(out_00_1, k__cospi_p16_m16);
+          const __m128i out_08_2 = _mm_madd_epi16(out_08_0, k__cospi_p24_p08);
+          const __m128i out_08_3 = _mm_madd_epi16(out_08_1, k__cospi_p24_p08);
+          const __m128i out_24_2 = _mm_madd_epi16(out_08_0, k__cospi_m08_p24);
+          const __m128i out_24_3 = _mm_madd_epi16(out_08_1, k__cospi_m08_p24);
+          // dct_const_round_shift
+          const __m128i out_00_4 =
+              _mm_add_epi32(out_00_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_00_5 =
+              _mm_add_epi32(out_00_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_16_4 =
+              _mm_add_epi32(out_16_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_16_5 =
+              _mm_add_epi32(out_16_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_08_4 =
+              _mm_add_epi32(out_08_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_08_5 =
+              _mm_add_epi32(out_08_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_24_4 =
+              _mm_add_epi32(out_24_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_24_5 =
+              _mm_add_epi32(out_24_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_00_6 = _mm_srai_epi32(out_00_4, DCT_CONST_BITS);
+          const __m128i out_00_7 = _mm_srai_epi32(out_00_5, DCT_CONST_BITS);
+          const __m128i out_16_6 = _mm_srai_epi32(out_16_4, DCT_CONST_BITS);
+          const __m128i out_16_7 = _mm_srai_epi32(out_16_5, DCT_CONST_BITS);
+          const __m128i out_08_6 = _mm_srai_epi32(out_08_4, DCT_CONST_BITS);
+          const __m128i out_08_7 = _mm_srai_epi32(out_08_5, DCT_CONST_BITS);
+          const __m128i out_24_6 = _mm_srai_epi32(out_24_4, DCT_CONST_BITS);
+          const __m128i out_24_7 = _mm_srai_epi32(out_24_5, DCT_CONST_BITS);
+          // Combine
+          out[0] = _mm_packs_epi32(out_00_6, out_00_7);
+          out[16] = _mm_packs_epi32(out_16_6, out_16_7);
+          out[8] = _mm_packs_epi32(out_08_6, out_08_7);
+          out[24] = _mm_packs_epi32(out_24_6, out_24_7);
+#if DCT_HIGH_BIT_DEPTH
+          overflow =
+              check_epi16_overflow_x4(&out[0], &out[16], &out[8], &out[24]);
+          if (overflow) {
+            if (pass == 0)
+              HIGH_FDCT32x32_2D_C(input, output_org, stride);
+            else
+              HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        {
+          const __m128i s2_09_0 = _mm_unpacklo_epi16(step1[9], step1[14]);
+          const __m128i s2_09_1 = _mm_unpackhi_epi16(step1[9], step1[14]);
+          const __m128i s2_10_0 = _mm_unpacklo_epi16(step1[10], step1[13]);
+          const __m128i s2_10_1 = _mm_unpackhi_epi16(step1[10], step1[13]);
+          const __m128i s2_09_2 = _mm_madd_epi16(s2_09_0, k__cospi_m08_p24);
+          const __m128i s2_09_3 = _mm_madd_epi16(s2_09_1, k__cospi_m08_p24);
+          const __m128i s2_10_2 = _mm_madd_epi16(s2_10_0, k__cospi_m24_m08);
+          const __m128i s2_10_3 = _mm_madd_epi16(s2_10_1, k__cospi_m24_m08);
+          const __m128i s2_13_2 = _mm_madd_epi16(s2_10_0, k__cospi_m08_p24);
+          const __m128i s2_13_3 = _mm_madd_epi16(s2_10_1, k__cospi_m08_p24);
+          const __m128i s2_14_2 = _mm_madd_epi16(s2_09_0, k__cospi_p24_p08);
+          const __m128i s2_14_3 = _mm_madd_epi16(s2_09_1, k__cospi_p24_p08);
+          // dct_const_round_shift
+          const __m128i s2_09_4 = _mm_add_epi32(s2_09_2, k__DCT_CONST_ROUNDING);
+          const __m128i s2_09_5 = _mm_add_epi32(s2_09_3, k__DCT_CONST_ROUNDING);
+          const __m128i s2_10_4 = _mm_add_epi32(s2_10_2, k__DCT_CONST_ROUNDING);
+          const __m128i s2_10_5 = _mm_add_epi32(s2_10_3, k__DCT_CONST_ROUNDING);
+          const __m128i s2_13_4 = _mm_add_epi32(s2_13_2, k__DCT_CONST_ROUNDING);
+          const __m128i s2_13_5 = _mm_add_epi32(s2_13_3, k__DCT_CONST_ROUNDING);
+          const __m128i s2_14_4 = _mm_add_epi32(s2_14_2, k__DCT_CONST_ROUNDING);
+          const __m128i s2_14_5 = _mm_add_epi32(s2_14_3, k__DCT_CONST_ROUNDING);
+          const __m128i s2_09_6 = _mm_srai_epi32(s2_09_4, DCT_CONST_BITS);
+          const __m128i s2_09_7 = _mm_srai_epi32(s2_09_5, DCT_CONST_BITS);
+          const __m128i s2_10_6 = _mm_srai_epi32(s2_10_4, DCT_CONST_BITS);
+          const __m128i s2_10_7 = _mm_srai_epi32(s2_10_5, DCT_CONST_BITS);
+          const __m128i s2_13_6 = _mm_srai_epi32(s2_13_4, DCT_CONST_BITS);
+          const __m128i s2_13_7 = _mm_srai_epi32(s2_13_5, DCT_CONST_BITS);
+          const __m128i s2_14_6 = _mm_srai_epi32(s2_14_4, DCT_CONST_BITS);
+          const __m128i s2_14_7 = _mm_srai_epi32(s2_14_5, DCT_CONST_BITS);
+          // Combine
+          step2[9] = _mm_packs_epi32(s2_09_6, s2_09_7);
+          step2[10] = _mm_packs_epi32(s2_10_6, s2_10_7);
+          step2[13] = _mm_packs_epi32(s2_13_6, s2_13_7);
+          step2[14] = _mm_packs_epi32(s2_14_6, s2_14_7);
+#if DCT_HIGH_BIT_DEPTH
+          overflow = check_epi16_overflow_x4(&step2[9], &step2[10], &step2[13],
+                                             &step2[14]);
+          if (overflow) {
+            if (pass == 0)
+              HIGH_FDCT32x32_2D_C(input, output_org, stride);
+            else
+              HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        {
+          step2[16] = ADD_EPI16(step1[19], step3[16]);
+          step2[17] = ADD_EPI16(step1[18], step3[17]);
+          step2[18] = SUB_EPI16(step3[17], step1[18]);
+          step2[19] = SUB_EPI16(step3[16], step1[19]);
+          step2[20] = SUB_EPI16(step3[23], step1[20]);
+          step2[21] = SUB_EPI16(step3[22], step1[21]);
+          step2[22] = ADD_EPI16(step1[21], step3[22]);
+          step2[23] = ADD_EPI16(step1[20], step3[23]);
+          step2[24] = ADD_EPI16(step1[27], step3[24]);
+          step2[25] = ADD_EPI16(step1[26], step3[25]);
+          step2[26] = SUB_EPI16(step3[25], step1[26]);
+          step2[27] = SUB_EPI16(step3[24], step1[27]);
+          step2[28] = SUB_EPI16(step3[31], step1[28]);
+          step2[29] = SUB_EPI16(step3[30], step1[29]);
+          step2[30] = ADD_EPI16(step1[29], step3[30]);
+          step2[31] = ADD_EPI16(step1[28], step3[31]);
+#if DCT_HIGH_BIT_DEPTH
+          overflow = check_epi16_overflow_x16(
+              &step2[16], &step2[17], &step2[18], &step2[19], &step2[20],
+              &step2[21], &step2[22], &step2[23], &step2[24], &step2[25],
+              &step2[26], &step2[27], &step2[28], &step2[29], &step2[30],
+              &step2[31]);
+          if (overflow) {
+            if (pass == 0)
+              HIGH_FDCT32x32_2D_C(input, output_org, stride);
+            else
+              HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        // Stage 6
+        {
+          const __m128i out_04_0 = _mm_unpacklo_epi16(step2[4], step2[7]);
+          const __m128i out_04_1 = _mm_unpackhi_epi16(step2[4], step2[7]);
+          const __m128i out_20_0 = _mm_unpacklo_epi16(step2[5], step2[6]);
+          const __m128i out_20_1 = _mm_unpackhi_epi16(step2[5], step2[6]);
+          const __m128i out_12_0 = _mm_unpacklo_epi16(step2[5], step2[6]);
+          const __m128i out_12_1 = _mm_unpackhi_epi16(step2[5], step2[6]);
+          const __m128i out_28_0 = _mm_unpacklo_epi16(step2[4], step2[7]);
+          const __m128i out_28_1 = _mm_unpackhi_epi16(step2[4], step2[7]);
+          const __m128i out_04_2 = _mm_madd_epi16(out_04_0, k__cospi_p28_p04);
+          const __m128i out_04_3 = _mm_madd_epi16(out_04_1, k__cospi_p28_p04);
+          const __m128i out_20_2 = _mm_madd_epi16(out_20_0, k__cospi_p12_p20);
+          const __m128i out_20_3 = _mm_madd_epi16(out_20_1, k__cospi_p12_p20);
+          const __m128i out_12_2 = _mm_madd_epi16(out_12_0, k__cospi_m20_p12);
+          const __m128i out_12_3 = _mm_madd_epi16(out_12_1, k__cospi_m20_p12);
+          const __m128i out_28_2 = _mm_madd_epi16(out_28_0, k__cospi_m04_p28);
+          const __m128i out_28_3 = _mm_madd_epi16(out_28_1, k__cospi_m04_p28);
+          // dct_const_round_shift
+          const __m128i out_04_4 =
+              _mm_add_epi32(out_04_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_04_5 =
+              _mm_add_epi32(out_04_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_20_4 =
+              _mm_add_epi32(out_20_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_20_5 =
+              _mm_add_epi32(out_20_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_12_4 =
+              _mm_add_epi32(out_12_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_12_5 =
+              _mm_add_epi32(out_12_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_28_4 =
+              _mm_add_epi32(out_28_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_28_5 =
+              _mm_add_epi32(out_28_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_04_6 = _mm_srai_epi32(out_04_4, DCT_CONST_BITS);
+          const __m128i out_04_7 = _mm_srai_epi32(out_04_5, DCT_CONST_BITS);
+          const __m128i out_20_6 = _mm_srai_epi32(out_20_4, DCT_CONST_BITS);
+          const __m128i out_20_7 = _mm_srai_epi32(out_20_5, DCT_CONST_BITS);
+          const __m128i out_12_6 = _mm_srai_epi32(out_12_4, DCT_CONST_BITS);
+          const __m128i out_12_7 = _mm_srai_epi32(out_12_5, DCT_CONST_BITS);
+          const __m128i out_28_6 = _mm_srai_epi32(out_28_4, DCT_CONST_BITS);
+          const __m128i out_28_7 = _mm_srai_epi32(out_28_5, DCT_CONST_BITS);
+          // Combine
+          out[4] = _mm_packs_epi32(out_04_6, out_04_7);
+          out[20] = _mm_packs_epi32(out_20_6, out_20_7);
+          out[12] = _mm_packs_epi32(out_12_6, out_12_7);
+          out[28] = _mm_packs_epi32(out_28_6, out_28_7);
+#if DCT_HIGH_BIT_DEPTH
+          overflow =
+              check_epi16_overflow_x4(&out[4], &out[20], &out[12], &out[28]);
+          if (overflow) {
+            if (pass == 0)
+              HIGH_FDCT32x32_2D_C(input, output_org, stride);
+            else
+              HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        {
+          step3[8] = ADD_EPI16(step2[9], step1[8]);
+          step3[9] = SUB_EPI16(step1[8], step2[9]);
+          step3[10] = SUB_EPI16(step1[11], step2[10]);
+          step3[11] = ADD_EPI16(step2[10], step1[11]);
+          step3[12] = ADD_EPI16(step2[13], step1[12]);
+          step3[13] = SUB_EPI16(step1[12], step2[13]);
+          step3[14] = SUB_EPI16(step1[15], step2[14]);
+          step3[15] = ADD_EPI16(step2[14], step1[15]);
+#if DCT_HIGH_BIT_DEPTH
+          overflow = check_epi16_overflow_x8(&step3[8], &step3[9], &step3[10],
+                                             &step3[11], &step3[12], &step3[13],
+                                             &step3[14], &step3[15]);
+          if (overflow) {
+            if (pass == 0)
+              HIGH_FDCT32x32_2D_C(input, output_org, stride);
+            else
+              HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        {
+          const __m128i s3_17_0 = _mm_unpacklo_epi16(step2[17], step2[30]);
+          const __m128i s3_17_1 = _mm_unpackhi_epi16(step2[17], step2[30]);
+          const __m128i s3_18_0 = _mm_unpacklo_epi16(step2[18], step2[29]);
+          const __m128i s3_18_1 = _mm_unpackhi_epi16(step2[18], step2[29]);
+          const __m128i s3_21_0 = _mm_unpacklo_epi16(step2[21], step2[26]);
+          const __m128i s3_21_1 = _mm_unpackhi_epi16(step2[21], step2[26]);
+          const __m128i s3_22_0 = _mm_unpacklo_epi16(step2[22], step2[25]);
+          const __m128i s3_22_1 = _mm_unpackhi_epi16(step2[22], step2[25]);
+          const __m128i s3_17_2 = _mm_madd_epi16(s3_17_0, k__cospi_m04_p28);
+          const __m128i s3_17_3 = _mm_madd_epi16(s3_17_1, k__cospi_m04_p28);
+          const __m128i s3_18_2 = _mm_madd_epi16(s3_18_0, k__cospi_m28_m04);
+          const __m128i s3_18_3 = _mm_madd_epi16(s3_18_1, k__cospi_m28_m04);
+          const __m128i s3_21_2 = _mm_madd_epi16(s3_21_0, k__cospi_m20_p12);
+          const __m128i s3_21_3 = _mm_madd_epi16(s3_21_1, k__cospi_m20_p12);
+          const __m128i s3_22_2 = _mm_madd_epi16(s3_22_0, k__cospi_m12_m20);
+          const __m128i s3_22_3 = _mm_madd_epi16(s3_22_1, k__cospi_m12_m20);
+          const __m128i s3_25_2 = _mm_madd_epi16(s3_22_0, k__cospi_m20_p12);
+          const __m128i s3_25_3 = _mm_madd_epi16(s3_22_1, k__cospi_m20_p12);
+          const __m128i s3_26_2 = _mm_madd_epi16(s3_21_0, k__cospi_p12_p20);
+          const __m128i s3_26_3 = _mm_madd_epi16(s3_21_1, k__cospi_p12_p20);
+          const __m128i s3_29_2 = _mm_madd_epi16(s3_18_0, k__cospi_m04_p28);
+          const __m128i s3_29_3 = _mm_madd_epi16(s3_18_1, k__cospi_m04_p28);
+          const __m128i s3_30_2 = _mm_madd_epi16(s3_17_0, k__cospi_p28_p04);
+          const __m128i s3_30_3 = _mm_madd_epi16(s3_17_1, k__cospi_p28_p04);
+          // dct_const_round_shift
+          const __m128i s3_17_4 = _mm_add_epi32(s3_17_2, k__DCT_CONST_ROUNDING);
+          const __m128i s3_17_5 = _mm_add_epi32(s3_17_3, k__DCT_CONST_ROUNDING);
+          const __m128i s3_18_4 = _mm_add_epi32(s3_18_2, k__DCT_CONST_ROUNDING);
+          const __m128i s3_18_5 = _mm_add_epi32(s3_18_3, k__DCT_CONST_ROUNDING);
+          const __m128i s3_21_4 = _mm_add_epi32(s3_21_2, k__DCT_CONST_ROUNDING);
+          const __m128i s3_21_5 = _mm_add_epi32(s3_21_3, k__DCT_CONST_ROUNDING);
+          const __m128i s3_22_4 = _mm_add_epi32(s3_22_2, k__DCT_CONST_ROUNDING);
+          const __m128i s3_22_5 = _mm_add_epi32(s3_22_3, k__DCT_CONST_ROUNDING);
+          const __m128i s3_17_6 = _mm_srai_epi32(s3_17_4, DCT_CONST_BITS);
+          const __m128i s3_17_7 = _mm_srai_epi32(s3_17_5, DCT_CONST_BITS);
+          const __m128i s3_18_6 = _mm_srai_epi32(s3_18_4, DCT_CONST_BITS);
+          const __m128i s3_18_7 = _mm_srai_epi32(s3_18_5, DCT_CONST_BITS);
+          const __m128i s3_21_6 = _mm_srai_epi32(s3_21_4, DCT_CONST_BITS);
+          const __m128i s3_21_7 = _mm_srai_epi32(s3_21_5, DCT_CONST_BITS);
+          const __m128i s3_22_6 = _mm_srai_epi32(s3_22_4, DCT_CONST_BITS);
+          const __m128i s3_22_7 = _mm_srai_epi32(s3_22_5, DCT_CONST_BITS);
+          const __m128i s3_25_4 = _mm_add_epi32(s3_25_2, k__DCT_CONST_ROUNDING);
+          const __m128i s3_25_5 = _mm_add_epi32(s3_25_3, k__DCT_CONST_ROUNDING);
+          const __m128i s3_26_4 = _mm_add_epi32(s3_26_2, k__DCT_CONST_ROUNDING);
+          const __m128i s3_26_5 = _mm_add_epi32(s3_26_3, k__DCT_CONST_ROUNDING);
+          const __m128i s3_29_4 = _mm_add_epi32(s3_29_2, k__DCT_CONST_ROUNDING);
+          const __m128i s3_29_5 = _mm_add_epi32(s3_29_3, k__DCT_CONST_ROUNDING);
+          const __m128i s3_30_4 = _mm_add_epi32(s3_30_2, k__DCT_CONST_ROUNDING);
+          const __m128i s3_30_5 = _mm_add_epi32(s3_30_3, k__DCT_CONST_ROUNDING);
+          const __m128i s3_25_6 = _mm_srai_epi32(s3_25_4, DCT_CONST_BITS);
+          const __m128i s3_25_7 = _mm_srai_epi32(s3_25_5, DCT_CONST_BITS);
+          const __m128i s3_26_6 = _mm_srai_epi32(s3_26_4, DCT_CONST_BITS);
+          const __m128i s3_26_7 = _mm_srai_epi32(s3_26_5, DCT_CONST_BITS);
+          const __m128i s3_29_6 = _mm_srai_epi32(s3_29_4, DCT_CONST_BITS);
+          const __m128i s3_29_7 = _mm_srai_epi32(s3_29_5, DCT_CONST_BITS);
+          const __m128i s3_30_6 = _mm_srai_epi32(s3_30_4, DCT_CONST_BITS);
+          const __m128i s3_30_7 = _mm_srai_epi32(s3_30_5, DCT_CONST_BITS);
+          // Combine
+          step3[17] = _mm_packs_epi32(s3_17_6, s3_17_7);
+          step3[18] = _mm_packs_epi32(s3_18_6, s3_18_7);
+          step3[21] = _mm_packs_epi32(s3_21_6, s3_21_7);
+          step3[22] = _mm_packs_epi32(s3_22_6, s3_22_7);
+          // Combine
+          step3[25] = _mm_packs_epi32(s3_25_6, s3_25_7);
+          step3[26] = _mm_packs_epi32(s3_26_6, s3_26_7);
+          step3[29] = _mm_packs_epi32(s3_29_6, s3_29_7);
+          step3[30] = _mm_packs_epi32(s3_30_6, s3_30_7);
+#if DCT_HIGH_BIT_DEPTH
+          overflow = check_epi16_overflow_x8(&step3[17], &step3[18], &step3[21],
+                                             &step3[22], &step3[25], &step3[26],
+                                             &step3[29], &step3[30]);
+          if (overflow) {
+            if (pass == 0)
+              HIGH_FDCT32x32_2D_C(input, output_org, stride);
+            else
+              HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        // Stage 7
+        {
+          const __m128i out_02_0 = _mm_unpacklo_epi16(step3[8], step3[15]);
+          const __m128i out_02_1 = _mm_unpackhi_epi16(step3[8], step3[15]);
+          const __m128i out_18_0 = _mm_unpacklo_epi16(step3[9], step3[14]);
+          const __m128i out_18_1 = _mm_unpackhi_epi16(step3[9], step3[14]);
+          const __m128i out_10_0 = _mm_unpacklo_epi16(step3[10], step3[13]);
+          const __m128i out_10_1 = _mm_unpackhi_epi16(step3[10], step3[13]);
+          const __m128i out_26_0 = _mm_unpacklo_epi16(step3[11], step3[12]);
+          const __m128i out_26_1 = _mm_unpackhi_epi16(step3[11], step3[12]);
+          const __m128i out_02_2 = _mm_madd_epi16(out_02_0, k__cospi_p30_p02);
+          const __m128i out_02_3 = _mm_madd_epi16(out_02_1, k__cospi_p30_p02);
+          const __m128i out_18_2 = _mm_madd_epi16(out_18_0, k__cospi_p14_p18);
+          const __m128i out_18_3 = _mm_madd_epi16(out_18_1, k__cospi_p14_p18);
+          const __m128i out_10_2 = _mm_madd_epi16(out_10_0, k__cospi_p22_p10);
+          const __m128i out_10_3 = _mm_madd_epi16(out_10_1, k__cospi_p22_p10);
+          const __m128i out_26_2 = _mm_madd_epi16(out_26_0, k__cospi_p06_p26);
+          const __m128i out_26_3 = _mm_madd_epi16(out_26_1, k__cospi_p06_p26);
+          const __m128i out_06_2 = _mm_madd_epi16(out_26_0, k__cospi_m26_p06);
+          const __m128i out_06_3 = _mm_madd_epi16(out_26_1, k__cospi_m26_p06);
+          const __m128i out_22_2 = _mm_madd_epi16(out_10_0, k__cospi_m10_p22);
+          const __m128i out_22_3 = _mm_madd_epi16(out_10_1, k__cospi_m10_p22);
+          const __m128i out_14_2 = _mm_madd_epi16(out_18_0, k__cospi_m18_p14);
+          const __m128i out_14_3 = _mm_madd_epi16(out_18_1, k__cospi_m18_p14);
+          const __m128i out_30_2 = _mm_madd_epi16(out_02_0, k__cospi_m02_p30);
+          const __m128i out_30_3 = _mm_madd_epi16(out_02_1, k__cospi_m02_p30);
+          // dct_const_round_shift
+          const __m128i out_02_4 =
+              _mm_add_epi32(out_02_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_02_5 =
+              _mm_add_epi32(out_02_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_18_4 =
+              _mm_add_epi32(out_18_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_18_5 =
+              _mm_add_epi32(out_18_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_10_4 =
+              _mm_add_epi32(out_10_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_10_5 =
+              _mm_add_epi32(out_10_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_26_4 =
+              _mm_add_epi32(out_26_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_26_5 =
+              _mm_add_epi32(out_26_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_06_4 =
+              _mm_add_epi32(out_06_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_06_5 =
+              _mm_add_epi32(out_06_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_22_4 =
+              _mm_add_epi32(out_22_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_22_5 =
+              _mm_add_epi32(out_22_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_14_4 =
+              _mm_add_epi32(out_14_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_14_5 =
+              _mm_add_epi32(out_14_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_30_4 =
+              _mm_add_epi32(out_30_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_30_5 =
+              _mm_add_epi32(out_30_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_02_6 = _mm_srai_epi32(out_02_4, DCT_CONST_BITS);
+          const __m128i out_02_7 = _mm_srai_epi32(out_02_5, DCT_CONST_BITS);
+          const __m128i out_18_6 = _mm_srai_epi32(out_18_4, DCT_CONST_BITS);
+          const __m128i out_18_7 = _mm_srai_epi32(out_18_5, DCT_CONST_BITS);
+          const __m128i out_10_6 = _mm_srai_epi32(out_10_4, DCT_CONST_BITS);
+          const __m128i out_10_7 = _mm_srai_epi32(out_10_5, DCT_CONST_BITS);
+          const __m128i out_26_6 = _mm_srai_epi32(out_26_4, DCT_CONST_BITS);
+          const __m128i out_26_7 = _mm_srai_epi32(out_26_5, DCT_CONST_BITS);
+          const __m128i out_06_6 = _mm_srai_epi32(out_06_4, DCT_CONST_BITS);
+          const __m128i out_06_7 = _mm_srai_epi32(out_06_5, DCT_CONST_BITS);
+          const __m128i out_22_6 = _mm_srai_epi32(out_22_4, DCT_CONST_BITS);
+          const __m128i out_22_7 = _mm_srai_epi32(out_22_5, DCT_CONST_BITS);
+          const __m128i out_14_6 = _mm_srai_epi32(out_14_4, DCT_CONST_BITS);
+          const __m128i out_14_7 = _mm_srai_epi32(out_14_5, DCT_CONST_BITS);
+          const __m128i out_30_6 = _mm_srai_epi32(out_30_4, DCT_CONST_BITS);
+          const __m128i out_30_7 = _mm_srai_epi32(out_30_5, DCT_CONST_BITS);
+          // Combine
+          out[2] = _mm_packs_epi32(out_02_6, out_02_7);
+          out[18] = _mm_packs_epi32(out_18_6, out_18_7);
+          out[10] = _mm_packs_epi32(out_10_6, out_10_7);
+          out[26] = _mm_packs_epi32(out_26_6, out_26_7);
+          out[6] = _mm_packs_epi32(out_06_6, out_06_7);
+          out[22] = _mm_packs_epi32(out_22_6, out_22_7);
+          out[14] = _mm_packs_epi32(out_14_6, out_14_7);
+          out[30] = _mm_packs_epi32(out_30_6, out_30_7);
+#if DCT_HIGH_BIT_DEPTH
+          overflow =
+              check_epi16_overflow_x8(&out[2], &out[18], &out[10], &out[26],
+                                      &out[6], &out[22], &out[14], &out[30]);
+          if (overflow) {
+            if (pass == 0)
+              HIGH_FDCT32x32_2D_C(input, output_org, stride);
+            else
+              HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        {
+          step1[16] = ADD_EPI16(step3[17], step2[16]);
+          step1[17] = SUB_EPI16(step2[16], step3[17]);
+          step1[18] = SUB_EPI16(step2[19], step3[18]);
+          step1[19] = ADD_EPI16(step3[18], step2[19]);
+          step1[20] = ADD_EPI16(step3[21], step2[20]);
+          step1[21] = SUB_EPI16(step2[20], step3[21]);
+          step1[22] = SUB_EPI16(step2[23], step3[22]);
+          step1[23] = ADD_EPI16(step3[22], step2[23]);
+          step1[24] = ADD_EPI16(step3[25], step2[24]);
+          step1[25] = SUB_EPI16(step2[24], step3[25]);
+          step1[26] = SUB_EPI16(step2[27], step3[26]);
+          step1[27] = ADD_EPI16(step3[26], step2[27]);
+          step1[28] = ADD_EPI16(step3[29], step2[28]);
+          step1[29] = SUB_EPI16(step2[28], step3[29]);
+          step1[30] = SUB_EPI16(step2[31], step3[30]);
+          step1[31] = ADD_EPI16(step3[30], step2[31]);
+#if DCT_HIGH_BIT_DEPTH
+          overflow = check_epi16_overflow_x16(
+              &step1[16], &step1[17], &step1[18], &step1[19], &step1[20],
+              &step1[21], &step1[22], &step1[23], &step1[24], &step1[25],
+              &step1[26], &step1[27], &step1[28], &step1[29], &step1[30],
+              &step1[31]);
+          if (overflow) {
+            if (pass == 0)
+              HIGH_FDCT32x32_2D_C(input, output_org, stride);
+            else
+              HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        // Final stage --- outputs indices are bit-reversed.
+        {
+          const __m128i out_01_0 = _mm_unpacklo_epi16(step1[16], step1[31]);
+          const __m128i out_01_1 = _mm_unpackhi_epi16(step1[16], step1[31]);
+          const __m128i out_17_0 = _mm_unpacklo_epi16(step1[17], step1[30]);
+          const __m128i out_17_1 = _mm_unpackhi_epi16(step1[17], step1[30]);
+          const __m128i out_09_0 = _mm_unpacklo_epi16(step1[18], step1[29]);
+          const __m128i out_09_1 = _mm_unpackhi_epi16(step1[18], step1[29]);
+          const __m128i out_25_0 = _mm_unpacklo_epi16(step1[19], step1[28]);
+          const __m128i out_25_1 = _mm_unpackhi_epi16(step1[19], step1[28]);
+          const __m128i out_01_2 = _mm_madd_epi16(out_01_0, k__cospi_p31_p01);
+          const __m128i out_01_3 = _mm_madd_epi16(out_01_1, k__cospi_p31_p01);
+          const __m128i out_17_2 = _mm_madd_epi16(out_17_0, k__cospi_p15_p17);
+          const __m128i out_17_3 = _mm_madd_epi16(out_17_1, k__cospi_p15_p17);
+          const __m128i out_09_2 = _mm_madd_epi16(out_09_0, k__cospi_p23_p09);
+          const __m128i out_09_3 = _mm_madd_epi16(out_09_1, k__cospi_p23_p09);
+          const __m128i out_25_2 = _mm_madd_epi16(out_25_0, k__cospi_p07_p25);
+          const __m128i out_25_3 = _mm_madd_epi16(out_25_1, k__cospi_p07_p25);
+          const __m128i out_07_2 = _mm_madd_epi16(out_25_0, k__cospi_m25_p07);
+          const __m128i out_07_3 = _mm_madd_epi16(out_25_1, k__cospi_m25_p07);
+          const __m128i out_23_2 = _mm_madd_epi16(out_09_0, k__cospi_m09_p23);
+          const __m128i out_23_3 = _mm_madd_epi16(out_09_1, k__cospi_m09_p23);
+          const __m128i out_15_2 = _mm_madd_epi16(out_17_0, k__cospi_m17_p15);
+          const __m128i out_15_3 = _mm_madd_epi16(out_17_1, k__cospi_m17_p15);
+          const __m128i out_31_2 = _mm_madd_epi16(out_01_0, k__cospi_m01_p31);
+          const __m128i out_31_3 = _mm_madd_epi16(out_01_1, k__cospi_m01_p31);
+          // dct_const_round_shift
+          const __m128i out_01_4 =
+              _mm_add_epi32(out_01_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_01_5 =
+              _mm_add_epi32(out_01_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_17_4 =
+              _mm_add_epi32(out_17_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_17_5 =
+              _mm_add_epi32(out_17_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_09_4 =
+              _mm_add_epi32(out_09_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_09_5 =
+              _mm_add_epi32(out_09_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_25_4 =
+              _mm_add_epi32(out_25_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_25_5 =
+              _mm_add_epi32(out_25_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_07_4 =
+              _mm_add_epi32(out_07_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_07_5 =
+              _mm_add_epi32(out_07_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_23_4 =
+              _mm_add_epi32(out_23_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_23_5 =
+              _mm_add_epi32(out_23_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_15_4 =
+              _mm_add_epi32(out_15_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_15_5 =
+              _mm_add_epi32(out_15_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_31_4 =
+              _mm_add_epi32(out_31_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_31_5 =
+              _mm_add_epi32(out_31_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_01_6 = _mm_srai_epi32(out_01_4, DCT_CONST_BITS);
+          const __m128i out_01_7 = _mm_srai_epi32(out_01_5, DCT_CONST_BITS);
+          const __m128i out_17_6 = _mm_srai_epi32(out_17_4, DCT_CONST_BITS);
+          const __m128i out_17_7 = _mm_srai_epi32(out_17_5, DCT_CONST_BITS);
+          const __m128i out_09_6 = _mm_srai_epi32(out_09_4, DCT_CONST_BITS);
+          const __m128i out_09_7 = _mm_srai_epi32(out_09_5, DCT_CONST_BITS);
+          const __m128i out_25_6 = _mm_srai_epi32(out_25_4, DCT_CONST_BITS);
+          const __m128i out_25_7 = _mm_srai_epi32(out_25_5, DCT_CONST_BITS);
+          const __m128i out_07_6 = _mm_srai_epi32(out_07_4, DCT_CONST_BITS);
+          const __m128i out_07_7 = _mm_srai_epi32(out_07_5, DCT_CONST_BITS);
+          const __m128i out_23_6 = _mm_srai_epi32(out_23_4, DCT_CONST_BITS);
+          const __m128i out_23_7 = _mm_srai_epi32(out_23_5, DCT_CONST_BITS);
+          const __m128i out_15_6 = _mm_srai_epi32(out_15_4, DCT_CONST_BITS);
+          const __m128i out_15_7 = _mm_srai_epi32(out_15_5, DCT_CONST_BITS);
+          const __m128i out_31_6 = _mm_srai_epi32(out_31_4, DCT_CONST_BITS);
+          const __m128i out_31_7 = _mm_srai_epi32(out_31_5, DCT_CONST_BITS);
+          // Combine
+          out[1] = _mm_packs_epi32(out_01_6, out_01_7);
+          out[17] = _mm_packs_epi32(out_17_6, out_17_7);
+          out[9] = _mm_packs_epi32(out_09_6, out_09_7);
+          out[25] = _mm_packs_epi32(out_25_6, out_25_7);
+          out[7] = _mm_packs_epi32(out_07_6, out_07_7);
+          out[23] = _mm_packs_epi32(out_23_6, out_23_7);
+          out[15] = _mm_packs_epi32(out_15_6, out_15_7);
+          out[31] = _mm_packs_epi32(out_31_6, out_31_7);
+#if DCT_HIGH_BIT_DEPTH
+          overflow =
+              check_epi16_overflow_x8(&out[1], &out[17], &out[9], &out[25],
+                                      &out[7], &out[23], &out[15], &out[31]);
+          if (overflow) {
+            if (pass == 0)
+              HIGH_FDCT32x32_2D_C(input, output_org, stride);
+            else
+              HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        {
+          const __m128i out_05_0 = _mm_unpacklo_epi16(step1[20], step1[27]);
+          const __m128i out_05_1 = _mm_unpackhi_epi16(step1[20], step1[27]);
+          const __m128i out_21_0 = _mm_unpacklo_epi16(step1[21], step1[26]);
+          const __m128i out_21_1 = _mm_unpackhi_epi16(step1[21], step1[26]);
+          const __m128i out_13_0 = _mm_unpacklo_epi16(step1[22], step1[25]);
+          const __m128i out_13_1 = _mm_unpackhi_epi16(step1[22], step1[25]);
+          const __m128i out_29_0 = _mm_unpacklo_epi16(step1[23], step1[24]);
+          const __m128i out_29_1 = _mm_unpackhi_epi16(step1[23], step1[24]);
+          const __m128i out_05_2 = _mm_madd_epi16(out_05_0, k__cospi_p27_p05);
+          const __m128i out_05_3 = _mm_madd_epi16(out_05_1, k__cospi_p27_p05);
+          const __m128i out_21_2 = _mm_madd_epi16(out_21_0, k__cospi_p11_p21);
+          const __m128i out_21_3 = _mm_madd_epi16(out_21_1, k__cospi_p11_p21);
+          const __m128i out_13_2 = _mm_madd_epi16(out_13_0, k__cospi_p19_p13);
+          const __m128i out_13_3 = _mm_madd_epi16(out_13_1, k__cospi_p19_p13);
+          const __m128i out_29_2 = _mm_madd_epi16(out_29_0, k__cospi_p03_p29);
+          const __m128i out_29_3 = _mm_madd_epi16(out_29_1, k__cospi_p03_p29);
+          const __m128i out_03_2 = _mm_madd_epi16(out_29_0, k__cospi_m29_p03);
+          const __m128i out_03_3 = _mm_madd_epi16(out_29_1, k__cospi_m29_p03);
+          const __m128i out_19_2 = _mm_madd_epi16(out_13_0, k__cospi_m13_p19);
+          const __m128i out_19_3 = _mm_madd_epi16(out_13_1, k__cospi_m13_p19);
+          const __m128i out_11_2 = _mm_madd_epi16(out_21_0, k__cospi_m21_p11);
+          const __m128i out_11_3 = _mm_madd_epi16(out_21_1, k__cospi_m21_p11);
+          const __m128i out_27_2 = _mm_madd_epi16(out_05_0, k__cospi_m05_p27);
+          const __m128i out_27_3 = _mm_madd_epi16(out_05_1, k__cospi_m05_p27);
+          // dct_const_round_shift
+          const __m128i out_05_4 =
+              _mm_add_epi32(out_05_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_05_5 =
+              _mm_add_epi32(out_05_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_21_4 =
+              _mm_add_epi32(out_21_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_21_5 =
+              _mm_add_epi32(out_21_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_13_4 =
+              _mm_add_epi32(out_13_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_13_5 =
+              _mm_add_epi32(out_13_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_29_4 =
+              _mm_add_epi32(out_29_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_29_5 =
+              _mm_add_epi32(out_29_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_03_4 =
+              _mm_add_epi32(out_03_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_03_5 =
+              _mm_add_epi32(out_03_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_19_4 =
+              _mm_add_epi32(out_19_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_19_5 =
+              _mm_add_epi32(out_19_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_11_4 =
+              _mm_add_epi32(out_11_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_11_5 =
+              _mm_add_epi32(out_11_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_27_4 =
+              _mm_add_epi32(out_27_2, k__DCT_CONST_ROUNDING);
+          const __m128i out_27_5 =
+              _mm_add_epi32(out_27_3, k__DCT_CONST_ROUNDING);
+          const __m128i out_05_6 = _mm_srai_epi32(out_05_4, DCT_CONST_BITS);
+          const __m128i out_05_7 = _mm_srai_epi32(out_05_5, DCT_CONST_BITS);
+          const __m128i out_21_6 = _mm_srai_epi32(out_21_4, DCT_CONST_BITS);
+          const __m128i out_21_7 = _mm_srai_epi32(out_21_5, DCT_CONST_BITS);
+          const __m128i out_13_6 = _mm_srai_epi32(out_13_4, DCT_CONST_BITS);
+          const __m128i out_13_7 = _mm_srai_epi32(out_13_5, DCT_CONST_BITS);
+          const __m128i out_29_6 = _mm_srai_epi32(out_29_4, DCT_CONST_BITS);
+          const __m128i out_29_7 = _mm_srai_epi32(out_29_5, DCT_CONST_BITS);
+          const __m128i out_03_6 = _mm_srai_epi32(out_03_4, DCT_CONST_BITS);
+          const __m128i out_03_7 = _mm_srai_epi32(out_03_5, DCT_CONST_BITS);
+          const __m128i out_19_6 = _mm_srai_epi32(out_19_4, DCT_CONST_BITS);
+          const __m128i out_19_7 = _mm_srai_epi32(out_19_5, DCT_CONST_BITS);
+          const __m128i out_11_6 = _mm_srai_epi32(out_11_4, DCT_CONST_BITS);
+          const __m128i out_11_7 = _mm_srai_epi32(out_11_5, DCT_CONST_BITS);
+          const __m128i out_27_6 = _mm_srai_epi32(out_27_4, DCT_CONST_BITS);
+          const __m128i out_27_7 = _mm_srai_epi32(out_27_5, DCT_CONST_BITS);
+          // Combine
+          out[5] = _mm_packs_epi32(out_05_6, out_05_7);
+          out[21] = _mm_packs_epi32(out_21_6, out_21_7);
+          out[13] = _mm_packs_epi32(out_13_6, out_13_7);
+          out[29] = _mm_packs_epi32(out_29_6, out_29_7);
+          out[3] = _mm_packs_epi32(out_03_6, out_03_7);
+          out[19] = _mm_packs_epi32(out_19_6, out_19_7);
+          out[11] = _mm_packs_epi32(out_11_6, out_11_7);
+          out[27] = _mm_packs_epi32(out_27_6, out_27_7);
+#if DCT_HIGH_BIT_DEPTH
+          overflow =
+              check_epi16_overflow_x8(&out[5], &out[21], &out[13], &out[29],
+                                      &out[3], &out[19], &out[11], &out[27]);
+          if (overflow) {
+            if (pass == 0)
+              HIGH_FDCT32x32_2D_C(input, output_org, stride);
+            else
+              HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+#if FDCT32x32_HIGH_PRECISION
+      } else {
+        __m128i lstep1[64], lstep2[64], lstep3[64];
+        __m128i u[32], v[32], sign[16];
+        const __m128i K32One = _mm_set_epi32(1, 1, 1, 1);
+        // start using 32-bit operations
+        // stage 3
+        {
+          // expanding to 32-bit length priori to addition operations
+          lstep2[0] = _mm_unpacklo_epi16(step2[0], kZero);
+          lstep2[1] = _mm_unpackhi_epi16(step2[0], kZero);
+          lstep2[2] = _mm_unpacklo_epi16(step2[1], kZero);
+          lstep2[3] = _mm_unpackhi_epi16(step2[1], kZero);
+          lstep2[4] = _mm_unpacklo_epi16(step2[2], kZero);
+          lstep2[5] = _mm_unpackhi_epi16(step2[2], kZero);
+          lstep2[6] = _mm_unpacklo_epi16(step2[3], kZero);
+          lstep2[7] = _mm_unpackhi_epi16(step2[3], kZero);
+          lstep2[8] = _mm_unpacklo_epi16(step2[4], kZero);
+          lstep2[9] = _mm_unpackhi_epi16(step2[4], kZero);
+          lstep2[10] = _mm_unpacklo_epi16(step2[5], kZero);
+          lstep2[11] = _mm_unpackhi_epi16(step2[5], kZero);
+          lstep2[12] = _mm_unpacklo_epi16(step2[6], kZero);
+          lstep2[13] = _mm_unpackhi_epi16(step2[6], kZero);
+          lstep2[14] = _mm_unpacklo_epi16(step2[7], kZero);
+          lstep2[15] = _mm_unpackhi_epi16(step2[7], kZero);
+          lstep2[0] = _mm_madd_epi16(lstep2[0], kOne);
+          lstep2[1] = _mm_madd_epi16(lstep2[1], kOne);
+          lstep2[2] = _mm_madd_epi16(lstep2[2], kOne);
+          lstep2[3] = _mm_madd_epi16(lstep2[3], kOne);
+          lstep2[4] = _mm_madd_epi16(lstep2[4], kOne);
+          lstep2[5] = _mm_madd_epi16(lstep2[5], kOne);
+          lstep2[6] = _mm_madd_epi16(lstep2[6], kOne);
+          lstep2[7] = _mm_madd_epi16(lstep2[7], kOne);
+          lstep2[8] = _mm_madd_epi16(lstep2[8], kOne);
+          lstep2[9] = _mm_madd_epi16(lstep2[9], kOne);
+          lstep2[10] = _mm_madd_epi16(lstep2[10], kOne);
+          lstep2[11] = _mm_madd_epi16(lstep2[11], kOne);
+          lstep2[12] = _mm_madd_epi16(lstep2[12], kOne);
+          lstep2[13] = _mm_madd_epi16(lstep2[13], kOne);
+          lstep2[14] = _mm_madd_epi16(lstep2[14], kOne);
+          lstep2[15] = _mm_madd_epi16(lstep2[15], kOne);
+
+          lstep3[0] = _mm_add_epi32(lstep2[14], lstep2[0]);
+          lstep3[1] = _mm_add_epi32(lstep2[15], lstep2[1]);
+          lstep3[2] = _mm_add_epi32(lstep2[12], lstep2[2]);
+          lstep3[3] = _mm_add_epi32(lstep2[13], lstep2[3]);
+          lstep3[4] = _mm_add_epi32(lstep2[10], lstep2[4]);
+          lstep3[5] = _mm_add_epi32(lstep2[11], lstep2[5]);
+          lstep3[6] = _mm_add_epi32(lstep2[8], lstep2[6]);
+          lstep3[7] = _mm_add_epi32(lstep2[9], lstep2[7]);
+          lstep3[8] = _mm_sub_epi32(lstep2[6], lstep2[8]);
+          lstep3[9] = _mm_sub_epi32(lstep2[7], lstep2[9]);
+          lstep3[10] = _mm_sub_epi32(lstep2[4], lstep2[10]);
+          lstep3[11] = _mm_sub_epi32(lstep2[5], lstep2[11]);
+          lstep3[12] = _mm_sub_epi32(lstep2[2], lstep2[12]);
+          lstep3[13] = _mm_sub_epi32(lstep2[3], lstep2[13]);
+          lstep3[14] = _mm_sub_epi32(lstep2[0], lstep2[14]);
+          lstep3[15] = _mm_sub_epi32(lstep2[1], lstep2[15]);
+        }
+        {
+          const __m128i s3_10_0 = _mm_unpacklo_epi16(step2[13], step2[10]);
+          const __m128i s3_10_1 = _mm_unpackhi_epi16(step2[13], step2[10]);
+          const __m128i s3_11_0 = _mm_unpacklo_epi16(step2[12], step2[11]);
+          const __m128i s3_11_1 = _mm_unpackhi_epi16(step2[12], step2[11]);
+          const __m128i s3_10_2 = _mm_madd_epi16(s3_10_0, k__cospi_p16_m16);
+          const __m128i s3_10_3 = _mm_madd_epi16(s3_10_1, k__cospi_p16_m16);
+          const __m128i s3_11_2 = _mm_madd_epi16(s3_11_0, k__cospi_p16_m16);
+          const __m128i s3_11_3 = _mm_madd_epi16(s3_11_1, k__cospi_p16_m16);
+          const __m128i s3_12_2 = _mm_madd_epi16(s3_11_0, k__cospi_p16_p16);
+          const __m128i s3_12_3 = _mm_madd_epi16(s3_11_1, k__cospi_p16_p16);
+          const __m128i s3_13_2 = _mm_madd_epi16(s3_10_0, k__cospi_p16_p16);
+          const __m128i s3_13_3 = _mm_madd_epi16(s3_10_1, k__cospi_p16_p16);
+          // dct_const_round_shift
+          const __m128i s3_10_4 = _mm_add_epi32(s3_10_2, k__DCT_CONST_ROUNDING);
+          const __m128i s3_10_5 = _mm_add_epi32(s3_10_3, k__DCT_CONST_ROUNDING);
+          const __m128i s3_11_4 = _mm_add_epi32(s3_11_2, k__DCT_CONST_ROUNDING);
+          const __m128i s3_11_5 = _mm_add_epi32(s3_11_3, k__DCT_CONST_ROUNDING);
+          const __m128i s3_12_4 = _mm_add_epi32(s3_12_2, k__DCT_CONST_ROUNDING);
+          const __m128i s3_12_5 = _mm_add_epi32(s3_12_3, k__DCT_CONST_ROUNDING);
+          const __m128i s3_13_4 = _mm_add_epi32(s3_13_2, k__DCT_CONST_ROUNDING);
+          const __m128i s3_13_5 = _mm_add_epi32(s3_13_3, k__DCT_CONST_ROUNDING);
+          lstep3[20] = _mm_srai_epi32(s3_10_4, DCT_CONST_BITS);
+          lstep3[21] = _mm_srai_epi32(s3_10_5, DCT_CONST_BITS);
+          lstep3[22] = _mm_srai_epi32(s3_11_4, DCT_CONST_BITS);
+          lstep3[23] = _mm_srai_epi32(s3_11_5, DCT_CONST_BITS);
+          lstep3[24] = _mm_srai_epi32(s3_12_4, DCT_CONST_BITS);
+          lstep3[25] = _mm_srai_epi32(s3_12_5, DCT_CONST_BITS);
+          lstep3[26] = _mm_srai_epi32(s3_13_4, DCT_CONST_BITS);
+          lstep3[27] = _mm_srai_epi32(s3_13_5, DCT_CONST_BITS);
+        }
+        {
+          lstep2[40] = _mm_unpacklo_epi16(step2[20], kZero);
+          lstep2[41] = _mm_unpackhi_epi16(step2[20], kZero);
+          lstep2[42] = _mm_unpacklo_epi16(step2[21], kZero);
+          lstep2[43] = _mm_unpackhi_epi16(step2[21], kZero);
+          lstep2[44] = _mm_unpacklo_epi16(step2[22], kZero);
+          lstep2[45] = _mm_unpackhi_epi16(step2[22], kZero);
+          lstep2[46] = _mm_unpacklo_epi16(step2[23], kZero);
+          lstep2[47] = _mm_unpackhi_epi16(step2[23], kZero);
+          lstep2[48] = _mm_unpacklo_epi16(step2[24], kZero);
+          lstep2[49] = _mm_unpackhi_epi16(step2[24], kZero);
+          lstep2[50] = _mm_unpacklo_epi16(step2[25], kZero);
+          lstep2[51] = _mm_unpackhi_epi16(step2[25], kZero);
+          lstep2[52] = _mm_unpacklo_epi16(step2[26], kZero);
+          lstep2[53] = _mm_unpackhi_epi16(step2[26], kZero);
+          lstep2[54] = _mm_unpacklo_epi16(step2[27], kZero);
+          lstep2[55] = _mm_unpackhi_epi16(step2[27], kZero);
+          lstep2[40] = _mm_madd_epi16(lstep2[40], kOne);
+          lstep2[41] = _mm_madd_epi16(lstep2[41], kOne);
+          lstep2[42] = _mm_madd_epi16(lstep2[42], kOne);
+          lstep2[43] = _mm_madd_epi16(lstep2[43], kOne);
+          lstep2[44] = _mm_madd_epi16(lstep2[44], kOne);
+          lstep2[45] = _mm_madd_epi16(lstep2[45], kOne);
+          lstep2[46] = _mm_madd_epi16(lstep2[46], kOne);
+          lstep2[47] = _mm_madd_epi16(lstep2[47], kOne);
+          lstep2[48] = _mm_madd_epi16(lstep2[48], kOne);
+          lstep2[49] = _mm_madd_epi16(lstep2[49], kOne);
+          lstep2[50] = _mm_madd_epi16(lstep2[50], kOne);
+          lstep2[51] = _mm_madd_epi16(lstep2[51], kOne);
+          lstep2[52] = _mm_madd_epi16(lstep2[52], kOne);
+          lstep2[53] = _mm_madd_epi16(lstep2[53], kOne);
+          lstep2[54] = _mm_madd_epi16(lstep2[54], kOne);
+          lstep2[55] = _mm_madd_epi16(lstep2[55], kOne);
+
+          lstep1[32] = _mm_unpacklo_epi16(step1[16], kZero);
+          lstep1[33] = _mm_unpackhi_epi16(step1[16], kZero);
+          lstep1[34] = _mm_unpacklo_epi16(step1[17], kZero);
+          lstep1[35] = _mm_unpackhi_epi16(step1[17], kZero);
+          lstep1[36] = _mm_unpacklo_epi16(step1[18], kZero);
+          lstep1[37] = _mm_unpackhi_epi16(step1[18], kZero);
+          lstep1[38] = _mm_unpacklo_epi16(step1[19], kZero);
+          lstep1[39] = _mm_unpackhi_epi16(step1[19], kZero);
+          lstep1[56] = _mm_unpacklo_epi16(step1[28], kZero);
+          lstep1[57] = _mm_unpackhi_epi16(step1[28], kZero);
+          lstep1[58] = _mm_unpacklo_epi16(step1[29], kZero);
+          lstep1[59] = _mm_unpackhi_epi16(step1[29], kZero);
+          lstep1[60] = _mm_unpacklo_epi16(step1[30], kZero);
+          lstep1[61] = _mm_unpackhi_epi16(step1[30], kZero);
+          lstep1[62] = _mm_unpacklo_epi16(step1[31], kZero);
+          lstep1[63] = _mm_unpackhi_epi16(step1[31], kZero);
+          lstep1[32] = _mm_madd_epi16(lstep1[32], kOne);
+          lstep1[33] = _mm_madd_epi16(lstep1[33], kOne);
+          lstep1[34] = _mm_madd_epi16(lstep1[34], kOne);
+          lstep1[35] = _mm_madd_epi16(lstep1[35], kOne);
+          lstep1[36] = _mm_madd_epi16(lstep1[36], kOne);
+          lstep1[37] = _mm_madd_epi16(lstep1[37], kOne);
+          lstep1[38] = _mm_madd_epi16(lstep1[38], kOne);
+          lstep1[39] = _mm_madd_epi16(lstep1[39], kOne);
+          lstep1[56] = _mm_madd_epi16(lstep1[56], kOne);
+          lstep1[57] = _mm_madd_epi16(lstep1[57], kOne);
+          lstep1[58] = _mm_madd_epi16(lstep1[58], kOne);
+          lstep1[59] = _mm_madd_epi16(lstep1[59], kOne);
+          lstep1[60] = _mm_madd_epi16(lstep1[60], kOne);
+          lstep1[61] = _mm_madd_epi16(lstep1[61], kOne);
+          lstep1[62] = _mm_madd_epi16(lstep1[62], kOne);
+          lstep1[63] = _mm_madd_epi16(lstep1[63], kOne);
+
+          lstep3[32] = _mm_add_epi32(lstep2[46], lstep1[32]);
+          lstep3[33] = _mm_add_epi32(lstep2[47], lstep1[33]);
+
+          lstep3[34] = _mm_add_epi32(lstep2[44], lstep1[34]);
+          lstep3[35] = _mm_add_epi32(lstep2[45], lstep1[35]);
+          lstep3[36] = _mm_add_epi32(lstep2[42], lstep1[36]);
+          lstep3[37] = _mm_add_epi32(lstep2[43], lstep1[37]);
+          lstep3[38] = _mm_add_epi32(lstep2[40], lstep1[38]);
+          lstep3[39] = _mm_add_epi32(lstep2[41], lstep1[39]);
+          lstep3[40] = _mm_sub_epi32(lstep1[38], lstep2[40]);
+          lstep3[41] = _mm_sub_epi32(lstep1[39], lstep2[41]);
+          lstep3[42] = _mm_sub_epi32(lstep1[36], lstep2[42]);
+          lstep3[43] = _mm_sub_epi32(lstep1[37], lstep2[43]);
+          lstep3[44] = _mm_sub_epi32(lstep1[34], lstep2[44]);
+          lstep3[45] = _mm_sub_epi32(lstep1[35], lstep2[45]);
+          lstep3[46] = _mm_sub_epi32(lstep1[32], lstep2[46]);
+          lstep3[47] = _mm_sub_epi32(lstep1[33], lstep2[47]);
+          lstep3[48] = _mm_sub_epi32(lstep1[62], lstep2[48]);
+          lstep3[49] = _mm_sub_epi32(lstep1[63], lstep2[49]);
+          lstep3[50] = _mm_sub_epi32(lstep1[60], lstep2[50]);
+          lstep3[51] = _mm_sub_epi32(lstep1[61], lstep2[51]);
+          lstep3[52] = _mm_sub_epi32(lstep1[58], lstep2[52]);
+          lstep3[53] = _mm_sub_epi32(lstep1[59], lstep2[53]);
+          lstep3[54] = _mm_sub_epi32(lstep1[56], lstep2[54]);
+          lstep3[55] = _mm_sub_epi32(lstep1[57], lstep2[55]);
+          lstep3[56] = _mm_add_epi32(lstep2[54], lstep1[56]);
+          lstep3[57] = _mm_add_epi32(lstep2[55], lstep1[57]);
+          lstep3[58] = _mm_add_epi32(lstep2[52], lstep1[58]);
+          lstep3[59] = _mm_add_epi32(lstep2[53], lstep1[59]);
+          lstep3[60] = _mm_add_epi32(lstep2[50], lstep1[60]);
+          lstep3[61] = _mm_add_epi32(lstep2[51], lstep1[61]);
+          lstep3[62] = _mm_add_epi32(lstep2[48], lstep1[62]);
+          lstep3[63] = _mm_add_epi32(lstep2[49], lstep1[63]);
+        }
+
+        // stage 4
+        {
+          // expanding to 32-bit length priori to addition operations
+          lstep2[16] = _mm_unpacklo_epi16(step2[8], kZero);
+          lstep2[17] = _mm_unpackhi_epi16(step2[8], kZero);
+          lstep2[18] = _mm_unpacklo_epi16(step2[9], kZero);
+          lstep2[19] = _mm_unpackhi_epi16(step2[9], kZero);
+          lstep2[28] = _mm_unpacklo_epi16(step2[14], kZero);
+          lstep2[29] = _mm_unpackhi_epi16(step2[14], kZero);
+          lstep2[30] = _mm_unpacklo_epi16(step2[15], kZero);
+          lstep2[31] = _mm_unpackhi_epi16(step2[15], kZero);
+          lstep2[16] = _mm_madd_epi16(lstep2[16], kOne);
+          lstep2[17] = _mm_madd_epi16(lstep2[17], kOne);
+          lstep2[18] = _mm_madd_epi16(lstep2[18], kOne);
+          lstep2[19] = _mm_madd_epi16(lstep2[19], kOne);
+          lstep2[28] = _mm_madd_epi16(lstep2[28], kOne);
+          lstep2[29] = _mm_madd_epi16(lstep2[29], kOne);
+          lstep2[30] = _mm_madd_epi16(lstep2[30], kOne);
+          lstep2[31] = _mm_madd_epi16(lstep2[31], kOne);
+
+          lstep1[0] = _mm_add_epi32(lstep3[6], lstep3[0]);
+          lstep1[1] = _mm_add_epi32(lstep3[7], lstep3[1]);
+          lstep1[2] = _mm_add_epi32(lstep3[4], lstep3[2]);
+          lstep1[3] = _mm_add_epi32(lstep3[5], lstep3[3]);
+          lstep1[4] = _mm_sub_epi32(lstep3[2], lstep3[4]);
+          lstep1[5] = _mm_sub_epi32(lstep3[3], lstep3[5]);
+          lstep1[6] = _mm_sub_epi32(lstep3[0], lstep3[6]);
+          lstep1[7] = _mm_sub_epi32(lstep3[1], lstep3[7]);
+          lstep1[16] = _mm_add_epi32(lstep3[22], lstep2[16]);
+          lstep1[17] = _mm_add_epi32(lstep3[23], lstep2[17]);
+          lstep1[18] = _mm_add_epi32(lstep3[20], lstep2[18]);
+          lstep1[19] = _mm_add_epi32(lstep3[21], lstep2[19]);
+          lstep1[20] = _mm_sub_epi32(lstep2[18], lstep3[20]);
+          lstep1[21] = _mm_sub_epi32(lstep2[19], lstep3[21]);
+          lstep1[22] = _mm_sub_epi32(lstep2[16], lstep3[22]);
+          lstep1[23] = _mm_sub_epi32(lstep2[17], lstep3[23]);
+          lstep1[24] = _mm_sub_epi32(lstep2[30], lstep3[24]);
+          lstep1[25] = _mm_sub_epi32(lstep2[31], lstep3[25]);
+          lstep1[26] = _mm_sub_epi32(lstep2[28], lstep3[26]);
+          lstep1[27] = _mm_sub_epi32(lstep2[29], lstep3[27]);
+          lstep1[28] = _mm_add_epi32(lstep3[26], lstep2[28]);
+          lstep1[29] = _mm_add_epi32(lstep3[27], lstep2[29]);
+          lstep1[30] = _mm_add_epi32(lstep3[24], lstep2[30]);
+          lstep1[31] = _mm_add_epi32(lstep3[25], lstep2[31]);
+        }
+        {
+          // to be continued...
+          //
+          const __m128i k32_p16_p16 = pair_set_epi32(cospi_16_64, cospi_16_64);
+          const __m128i k32_p16_m16 = pair_set_epi32(cospi_16_64, -cospi_16_64);
+
+          u[0] = _mm_unpacklo_epi32(lstep3[12], lstep3[10]);
+          u[1] = _mm_unpackhi_epi32(lstep3[12], lstep3[10]);
+          u[2] = _mm_unpacklo_epi32(lstep3[13], lstep3[11]);
+          u[3] = _mm_unpackhi_epi32(lstep3[13], lstep3[11]);
+
+          // TODO(jingning): manually inline k_madd_epi32_ to further hide
+          // instruction latency.
+          v[0] = k_madd_epi32(u[0], k32_p16_m16);
+          v[1] = k_madd_epi32(u[1], k32_p16_m16);
+          v[2] = k_madd_epi32(u[2], k32_p16_m16);
+          v[3] = k_madd_epi32(u[3], k32_p16_m16);
+          v[4] = k_madd_epi32(u[0], k32_p16_p16);
+          v[5] = k_madd_epi32(u[1], k32_p16_p16);
+          v[6] = k_madd_epi32(u[2], k32_p16_p16);
+          v[7] = k_madd_epi32(u[3], k32_p16_p16);
+#if DCT_HIGH_BIT_DEPTH
+          overflow = k_check_epi32_overflow_8(&v[0], &v[1], &v[2], &v[3], &v[4],
+                                              &v[5], &v[6], &v[7], &kZero);
+          if (overflow) {
+            HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+          u[0] = k_packs_epi64(v[0], v[1]);
+          u[1] = k_packs_epi64(v[2], v[3]);
+          u[2] = k_packs_epi64(v[4], v[5]);
+          u[3] = k_packs_epi64(v[6], v[7]);
+
+          v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+          v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+          v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+          v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+
+          lstep1[10] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+          lstep1[11] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+          lstep1[12] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+          lstep1[13] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+        }
+        {
+          const __m128i k32_m08_p24 = pair_set_epi32(-cospi_8_64, cospi_24_64);
+          const __m128i k32_m24_m08 = pair_set_epi32(-cospi_24_64, -cospi_8_64);
+          const __m128i k32_p24_p08 = pair_set_epi32(cospi_24_64, cospi_8_64);
+
+          u[0] = _mm_unpacklo_epi32(lstep3[36], lstep3[58]);
+          u[1] = _mm_unpackhi_epi32(lstep3[36], lstep3[58]);
+          u[2] = _mm_unpacklo_epi32(lstep3[37], lstep3[59]);
+          u[3] = _mm_unpackhi_epi32(lstep3[37], lstep3[59]);
+          u[4] = _mm_unpacklo_epi32(lstep3[38], lstep3[56]);
+          u[5] = _mm_unpackhi_epi32(lstep3[38], lstep3[56]);
+          u[6] = _mm_unpacklo_epi32(lstep3[39], lstep3[57]);
+          u[7] = _mm_unpackhi_epi32(lstep3[39], lstep3[57]);
+          u[8] = _mm_unpacklo_epi32(lstep3[40], lstep3[54]);
+          u[9] = _mm_unpackhi_epi32(lstep3[40], lstep3[54]);
+          u[10] = _mm_unpacklo_epi32(lstep3[41], lstep3[55]);
+          u[11] = _mm_unpackhi_epi32(lstep3[41], lstep3[55]);
+          u[12] = _mm_unpacklo_epi32(lstep3[42], lstep3[52]);
+          u[13] = _mm_unpackhi_epi32(lstep3[42], lstep3[52]);
+          u[14] = _mm_unpacklo_epi32(lstep3[43], lstep3[53]);
+          u[15] = _mm_unpackhi_epi32(lstep3[43], lstep3[53]);
+
+          v[0] = k_madd_epi32(u[0], k32_m08_p24);
+          v[1] = k_madd_epi32(u[1], k32_m08_p24);
+          v[2] = k_madd_epi32(u[2], k32_m08_p24);
+          v[3] = k_madd_epi32(u[3], k32_m08_p24);
+          v[4] = k_madd_epi32(u[4], k32_m08_p24);
+          v[5] = k_madd_epi32(u[5], k32_m08_p24);
+          v[6] = k_madd_epi32(u[6], k32_m08_p24);
+          v[7] = k_madd_epi32(u[7], k32_m08_p24);
+          v[8] = k_madd_epi32(u[8], k32_m24_m08);
+          v[9] = k_madd_epi32(u[9], k32_m24_m08);
+          v[10] = k_madd_epi32(u[10], k32_m24_m08);
+          v[11] = k_madd_epi32(u[11], k32_m24_m08);
+          v[12] = k_madd_epi32(u[12], k32_m24_m08);
+          v[13] = k_madd_epi32(u[13], k32_m24_m08);
+          v[14] = k_madd_epi32(u[14], k32_m24_m08);
+          v[15] = k_madd_epi32(u[15], k32_m24_m08);
+          v[16] = k_madd_epi32(u[12], k32_m08_p24);
+          v[17] = k_madd_epi32(u[13], k32_m08_p24);
+          v[18] = k_madd_epi32(u[14], k32_m08_p24);
+          v[19] = k_madd_epi32(u[15], k32_m08_p24);
+          v[20] = k_madd_epi32(u[8], k32_m08_p24);
+          v[21] = k_madd_epi32(u[9], k32_m08_p24);
+          v[22] = k_madd_epi32(u[10], k32_m08_p24);
+          v[23] = k_madd_epi32(u[11], k32_m08_p24);
+          v[24] = k_madd_epi32(u[4], k32_p24_p08);
+          v[25] = k_madd_epi32(u[5], k32_p24_p08);
+          v[26] = k_madd_epi32(u[6], k32_p24_p08);
+          v[27] = k_madd_epi32(u[7], k32_p24_p08);
+          v[28] = k_madd_epi32(u[0], k32_p24_p08);
+          v[29] = k_madd_epi32(u[1], k32_p24_p08);
+          v[30] = k_madd_epi32(u[2], k32_p24_p08);
+          v[31] = k_madd_epi32(u[3], k32_p24_p08);
+
+#if DCT_HIGH_BIT_DEPTH
+          overflow = k_check_epi32_overflow_32(
+              &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8],
+              &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &v[16],
+              &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], &v[24],
+              &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], &kZero);
+          if (overflow) {
+            HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+          u[0] = k_packs_epi64(v[0], v[1]);
+          u[1] = k_packs_epi64(v[2], v[3]);
+          u[2] = k_packs_epi64(v[4], v[5]);
+          u[3] = k_packs_epi64(v[6], v[7]);
+          u[4] = k_packs_epi64(v[8], v[9]);
+          u[5] = k_packs_epi64(v[10], v[11]);
+          u[6] = k_packs_epi64(v[12], v[13]);
+          u[7] = k_packs_epi64(v[14], v[15]);
+          u[8] = k_packs_epi64(v[16], v[17]);
+          u[9] = k_packs_epi64(v[18], v[19]);
+          u[10] = k_packs_epi64(v[20], v[21]);
+          u[11] = k_packs_epi64(v[22], v[23]);
+          u[12] = k_packs_epi64(v[24], v[25]);
+          u[13] = k_packs_epi64(v[26], v[27]);
+          u[14] = k_packs_epi64(v[28], v[29]);
+          u[15] = k_packs_epi64(v[30], v[31]);
+
+          v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+          v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+          v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+          v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+          v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+          v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+          v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+          v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+          v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING);
+          v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING);
+          v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING);
+          v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING);
+          v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING);
+          v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING);
+          v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING);
+          v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING);
+
+          lstep1[36] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+          lstep1[37] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+          lstep1[38] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+          lstep1[39] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+          lstep1[40] = _mm_srai_epi32(v[4], DCT_CONST_BITS);
+          lstep1[41] = _mm_srai_epi32(v[5], DCT_CONST_BITS);
+          lstep1[42] = _mm_srai_epi32(v[6], DCT_CONST_BITS);
+          lstep1[43] = _mm_srai_epi32(v[7], DCT_CONST_BITS);
+          lstep1[52] = _mm_srai_epi32(v[8], DCT_CONST_BITS);
+          lstep1[53] = _mm_srai_epi32(v[9], DCT_CONST_BITS);
+          lstep1[54] = _mm_srai_epi32(v[10], DCT_CONST_BITS);
+          lstep1[55] = _mm_srai_epi32(v[11], DCT_CONST_BITS);
+          lstep1[56] = _mm_srai_epi32(v[12], DCT_CONST_BITS);
+          lstep1[57] = _mm_srai_epi32(v[13], DCT_CONST_BITS);
+          lstep1[58] = _mm_srai_epi32(v[14], DCT_CONST_BITS);
+          lstep1[59] = _mm_srai_epi32(v[15], DCT_CONST_BITS);
+        }
+        // stage 5
+        {
+          lstep2[8] = _mm_add_epi32(lstep1[10], lstep3[8]);
+          lstep2[9] = _mm_add_epi32(lstep1[11], lstep3[9]);
+          lstep2[10] = _mm_sub_epi32(lstep3[8], lstep1[10]);
+          lstep2[11] = _mm_sub_epi32(lstep3[9], lstep1[11]);
+          lstep2[12] = _mm_sub_epi32(lstep3[14], lstep1[12]);
+          lstep2[13] = _mm_sub_epi32(lstep3[15], lstep1[13]);
+          lstep2[14] = _mm_add_epi32(lstep1[12], lstep3[14]);
+          lstep2[15] = _mm_add_epi32(lstep1[13], lstep3[15]);
+        }
+        {
+          const __m128i k32_p16_p16 = pair_set_epi32(cospi_16_64, cospi_16_64);
+          const __m128i k32_p16_m16 = pair_set_epi32(cospi_16_64, -cospi_16_64);
+          const __m128i k32_p24_p08 = pair_set_epi32(cospi_24_64, cospi_8_64);
+          const __m128i k32_m08_p24 = pair_set_epi32(-cospi_8_64, cospi_24_64);
+
+          u[0] = _mm_unpacklo_epi32(lstep1[0], lstep1[2]);
+          u[1] = _mm_unpackhi_epi32(lstep1[0], lstep1[2]);
+          u[2] = _mm_unpacklo_epi32(lstep1[1], lstep1[3]);
+          u[3] = _mm_unpackhi_epi32(lstep1[1], lstep1[3]);
+          u[4] = _mm_unpacklo_epi32(lstep1[4], lstep1[6]);
+          u[5] = _mm_unpackhi_epi32(lstep1[4], lstep1[6]);
+          u[6] = _mm_unpacklo_epi32(lstep1[5], lstep1[7]);
+          u[7] = _mm_unpackhi_epi32(lstep1[5], lstep1[7]);
+
+          // TODO(jingning): manually inline k_madd_epi32_ to further hide
+          // instruction latency.
+          v[0] = k_madd_epi32(u[0], k32_p16_p16);
+          v[1] = k_madd_epi32(u[1], k32_p16_p16);
+          v[2] = k_madd_epi32(u[2], k32_p16_p16);
+          v[3] = k_madd_epi32(u[3], k32_p16_p16);
+          v[4] = k_madd_epi32(u[0], k32_p16_m16);
+          v[5] = k_madd_epi32(u[1], k32_p16_m16);
+          v[6] = k_madd_epi32(u[2], k32_p16_m16);
+          v[7] = k_madd_epi32(u[3], k32_p16_m16);
+          v[8] = k_madd_epi32(u[4], k32_p24_p08);
+          v[9] = k_madd_epi32(u[5], k32_p24_p08);
+          v[10] = k_madd_epi32(u[6], k32_p24_p08);
+          v[11] = k_madd_epi32(u[7], k32_p24_p08);
+          v[12] = k_madd_epi32(u[4], k32_m08_p24);
+          v[13] = k_madd_epi32(u[5], k32_m08_p24);
+          v[14] = k_madd_epi32(u[6], k32_m08_p24);
+          v[15] = k_madd_epi32(u[7], k32_m08_p24);
+
+#if DCT_HIGH_BIT_DEPTH
+          overflow = k_check_epi32_overflow_16(
+              &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8],
+              &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &kZero);
+          if (overflow) {
+            HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+          u[0] = k_packs_epi64(v[0], v[1]);
+          u[1] = k_packs_epi64(v[2], v[3]);
+          u[2] = k_packs_epi64(v[4], v[5]);
+          u[3] = k_packs_epi64(v[6], v[7]);
+          u[4] = k_packs_epi64(v[8], v[9]);
+          u[5] = k_packs_epi64(v[10], v[11]);
+          u[6] = k_packs_epi64(v[12], v[13]);
+          u[7] = k_packs_epi64(v[14], v[15]);
+
+          v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+          v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+          v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+          v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+          v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+          v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+          v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+          v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+
+          u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+          u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+          u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+          u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+          u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS);
+          u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS);
+          u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS);
+          u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS);
+
+          sign[0] = _mm_cmplt_epi32(u[0], kZero);
+          sign[1] = _mm_cmplt_epi32(u[1], kZero);
+          sign[2] = _mm_cmplt_epi32(u[2], kZero);
+          sign[3] = _mm_cmplt_epi32(u[3], kZero);
+          sign[4] = _mm_cmplt_epi32(u[4], kZero);
+          sign[5] = _mm_cmplt_epi32(u[5], kZero);
+          sign[6] = _mm_cmplt_epi32(u[6], kZero);
+          sign[7] = _mm_cmplt_epi32(u[7], kZero);
+
+          u[0] = _mm_sub_epi32(u[0], sign[0]);
+          u[1] = _mm_sub_epi32(u[1], sign[1]);
+          u[2] = _mm_sub_epi32(u[2], sign[2]);
+          u[3] = _mm_sub_epi32(u[3], sign[3]);
+          u[4] = _mm_sub_epi32(u[4], sign[4]);
+          u[5] = _mm_sub_epi32(u[5], sign[5]);
+          u[6] = _mm_sub_epi32(u[6], sign[6]);
+          u[7] = _mm_sub_epi32(u[7], sign[7]);
+
+          u[0] = _mm_add_epi32(u[0], K32One);
+          u[1] = _mm_add_epi32(u[1], K32One);
+          u[2] = _mm_add_epi32(u[2], K32One);
+          u[3] = _mm_add_epi32(u[3], K32One);
+          u[4] = _mm_add_epi32(u[4], K32One);
+          u[5] = _mm_add_epi32(u[5], K32One);
+          u[6] = _mm_add_epi32(u[6], K32One);
+          u[7] = _mm_add_epi32(u[7], K32One);
+
+          u[0] = _mm_srai_epi32(u[0], 2);
+          u[1] = _mm_srai_epi32(u[1], 2);
+          u[2] = _mm_srai_epi32(u[2], 2);
+          u[3] = _mm_srai_epi32(u[3], 2);
+          u[4] = _mm_srai_epi32(u[4], 2);
+          u[5] = _mm_srai_epi32(u[5], 2);
+          u[6] = _mm_srai_epi32(u[6], 2);
+          u[7] = _mm_srai_epi32(u[7], 2);
+
+          // Combine
+          out[0] = _mm_packs_epi32(u[0], u[1]);
+          out[16] = _mm_packs_epi32(u[2], u[3]);
+          out[8] = _mm_packs_epi32(u[4], u[5]);
+          out[24] = _mm_packs_epi32(u[6], u[7]);
+#if DCT_HIGH_BIT_DEPTH
+          overflow =
+              check_epi16_overflow_x4(&out[0], &out[16], &out[8], &out[24]);
+          if (overflow) {
+            HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        {
+          const __m128i k32_m08_p24 = pair_set_epi32(-cospi_8_64, cospi_24_64);
+          const __m128i k32_m24_m08 = pair_set_epi32(-cospi_24_64, -cospi_8_64);
+          const __m128i k32_p24_p08 = pair_set_epi32(cospi_24_64, cospi_8_64);
+
+          u[0] = _mm_unpacklo_epi32(lstep1[18], lstep1[28]);
+          u[1] = _mm_unpackhi_epi32(lstep1[18], lstep1[28]);
+          u[2] = _mm_unpacklo_epi32(lstep1[19], lstep1[29]);
+          u[3] = _mm_unpackhi_epi32(lstep1[19], lstep1[29]);
+          u[4] = _mm_unpacklo_epi32(lstep1[20], lstep1[26]);
+          u[5] = _mm_unpackhi_epi32(lstep1[20], lstep1[26]);
+          u[6] = _mm_unpacklo_epi32(lstep1[21], lstep1[27]);
+          u[7] = _mm_unpackhi_epi32(lstep1[21], lstep1[27]);
+
+          v[0] = k_madd_epi32(u[0], k32_m08_p24);
+          v[1] = k_madd_epi32(u[1], k32_m08_p24);
+          v[2] = k_madd_epi32(u[2], k32_m08_p24);
+          v[3] = k_madd_epi32(u[3], k32_m08_p24);
+          v[4] = k_madd_epi32(u[4], k32_m24_m08);
+          v[5] = k_madd_epi32(u[5], k32_m24_m08);
+          v[6] = k_madd_epi32(u[6], k32_m24_m08);
+          v[7] = k_madd_epi32(u[7], k32_m24_m08);
+          v[8] = k_madd_epi32(u[4], k32_m08_p24);
+          v[9] = k_madd_epi32(u[5], k32_m08_p24);
+          v[10] = k_madd_epi32(u[6], k32_m08_p24);
+          v[11] = k_madd_epi32(u[7], k32_m08_p24);
+          v[12] = k_madd_epi32(u[0], k32_p24_p08);
+          v[13] = k_madd_epi32(u[1], k32_p24_p08);
+          v[14] = k_madd_epi32(u[2], k32_p24_p08);
+          v[15] = k_madd_epi32(u[3], k32_p24_p08);
+
+#if DCT_HIGH_BIT_DEPTH
+          overflow = k_check_epi32_overflow_16(
+              &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8],
+              &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &kZero);
+          if (overflow) {
+            HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+          u[0] = k_packs_epi64(v[0], v[1]);
+          u[1] = k_packs_epi64(v[2], v[3]);
+          u[2] = k_packs_epi64(v[4], v[5]);
+          u[3] = k_packs_epi64(v[6], v[7]);
+          u[4] = k_packs_epi64(v[8], v[9]);
+          u[5] = k_packs_epi64(v[10], v[11]);
+          u[6] = k_packs_epi64(v[12], v[13]);
+          u[7] = k_packs_epi64(v[14], v[15]);
+
+          u[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+          u[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+          u[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+          u[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+          u[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+          u[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+          u[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+          u[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+
+          lstep2[18] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+          lstep2[19] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+          lstep2[20] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+          lstep2[21] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+          lstep2[26] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+          lstep2[27] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+          lstep2[28] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+          lstep2[29] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+        }
+        {
+          lstep2[32] = _mm_add_epi32(lstep1[38], lstep3[32]);
+          lstep2[33] = _mm_add_epi32(lstep1[39], lstep3[33]);
+          lstep2[34] = _mm_add_epi32(lstep1[36], lstep3[34]);
+          lstep2[35] = _mm_add_epi32(lstep1[37], lstep3[35]);
+          lstep2[36] = _mm_sub_epi32(lstep3[34], lstep1[36]);
+          lstep2[37] = _mm_sub_epi32(lstep3[35], lstep1[37]);
+          lstep2[38] = _mm_sub_epi32(lstep3[32], lstep1[38]);
+          lstep2[39] = _mm_sub_epi32(lstep3[33], lstep1[39]);
+          lstep2[40] = _mm_sub_epi32(lstep3[46], lstep1[40]);
+          lstep2[41] = _mm_sub_epi32(lstep3[47], lstep1[41]);
+          lstep2[42] = _mm_sub_epi32(lstep3[44], lstep1[42]);
+          lstep2[43] = _mm_sub_epi32(lstep3[45], lstep1[43]);
+          lstep2[44] = _mm_add_epi32(lstep1[42], lstep3[44]);
+          lstep2[45] = _mm_add_epi32(lstep1[43], lstep3[45]);
+          lstep2[46] = _mm_add_epi32(lstep1[40], lstep3[46]);
+          lstep2[47] = _mm_add_epi32(lstep1[41], lstep3[47]);
+          lstep2[48] = _mm_add_epi32(lstep1[54], lstep3[48]);
+          lstep2[49] = _mm_add_epi32(lstep1[55], lstep3[49]);
+          lstep2[50] = _mm_add_epi32(lstep1[52], lstep3[50]);
+          lstep2[51] = _mm_add_epi32(lstep1[53], lstep3[51]);
+          lstep2[52] = _mm_sub_epi32(lstep3[50], lstep1[52]);
+          lstep2[53] = _mm_sub_epi32(lstep3[51], lstep1[53]);
+          lstep2[54] = _mm_sub_epi32(lstep3[48], lstep1[54]);
+          lstep2[55] = _mm_sub_epi32(lstep3[49], lstep1[55]);
+          lstep2[56] = _mm_sub_epi32(lstep3[62], lstep1[56]);
+          lstep2[57] = _mm_sub_epi32(lstep3[63], lstep1[57]);
+          lstep2[58] = _mm_sub_epi32(lstep3[60], lstep1[58]);
+          lstep2[59] = _mm_sub_epi32(lstep3[61], lstep1[59]);
+          lstep2[60] = _mm_add_epi32(lstep1[58], lstep3[60]);
+          lstep2[61] = _mm_add_epi32(lstep1[59], lstep3[61]);
+          lstep2[62] = _mm_add_epi32(lstep1[56], lstep3[62]);
+          lstep2[63] = _mm_add_epi32(lstep1[57], lstep3[63]);
+        }
+        // stage 6
+        {
+          const __m128i k32_p28_p04 = pair_set_epi32(cospi_28_64, cospi_4_64);
+          const __m128i k32_p12_p20 = pair_set_epi32(cospi_12_64, cospi_20_64);
+          const __m128i k32_m20_p12 = pair_set_epi32(-cospi_20_64, cospi_12_64);
+          const __m128i k32_m04_p28 = pair_set_epi32(-cospi_4_64, cospi_28_64);
+
+          u[0] = _mm_unpacklo_epi32(lstep2[8], lstep2[14]);
+          u[1] = _mm_unpackhi_epi32(lstep2[8], lstep2[14]);
+          u[2] = _mm_unpacklo_epi32(lstep2[9], lstep2[15]);
+          u[3] = _mm_unpackhi_epi32(lstep2[9], lstep2[15]);
+          u[4] = _mm_unpacklo_epi32(lstep2[10], lstep2[12]);
+          u[5] = _mm_unpackhi_epi32(lstep2[10], lstep2[12]);
+          u[6] = _mm_unpacklo_epi32(lstep2[11], lstep2[13]);
+          u[7] = _mm_unpackhi_epi32(lstep2[11], lstep2[13]);
+          u[8] = _mm_unpacklo_epi32(lstep2[10], lstep2[12]);
+          u[9] = _mm_unpackhi_epi32(lstep2[10], lstep2[12]);
+          u[10] = _mm_unpacklo_epi32(lstep2[11], lstep2[13]);
+          u[11] = _mm_unpackhi_epi32(lstep2[11], lstep2[13]);
+          u[12] = _mm_unpacklo_epi32(lstep2[8], lstep2[14]);
+          u[13] = _mm_unpackhi_epi32(lstep2[8], lstep2[14]);
+          u[14] = _mm_unpacklo_epi32(lstep2[9], lstep2[15]);
+          u[15] = _mm_unpackhi_epi32(lstep2[9], lstep2[15]);
+
+          v[0] = k_madd_epi32(u[0], k32_p28_p04);
+          v[1] = k_madd_epi32(u[1], k32_p28_p04);
+          v[2] = k_madd_epi32(u[2], k32_p28_p04);
+          v[3] = k_madd_epi32(u[3], k32_p28_p04);
+          v[4] = k_madd_epi32(u[4], k32_p12_p20);
+          v[5] = k_madd_epi32(u[5], k32_p12_p20);
+          v[6] = k_madd_epi32(u[6], k32_p12_p20);
+          v[7] = k_madd_epi32(u[7], k32_p12_p20);
+          v[8] = k_madd_epi32(u[8], k32_m20_p12);
+          v[9] = k_madd_epi32(u[9], k32_m20_p12);
+          v[10] = k_madd_epi32(u[10], k32_m20_p12);
+          v[11] = k_madd_epi32(u[11], k32_m20_p12);
+          v[12] = k_madd_epi32(u[12], k32_m04_p28);
+          v[13] = k_madd_epi32(u[13], k32_m04_p28);
+          v[14] = k_madd_epi32(u[14], k32_m04_p28);
+          v[15] = k_madd_epi32(u[15], k32_m04_p28);
+
+#if DCT_HIGH_BIT_DEPTH
+          overflow = k_check_epi32_overflow_16(
+              &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8],
+              &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &kZero);
+          if (overflow) {
+            HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+          u[0] = k_packs_epi64(v[0], v[1]);
+          u[1] = k_packs_epi64(v[2], v[3]);
+          u[2] = k_packs_epi64(v[4], v[5]);
+          u[3] = k_packs_epi64(v[6], v[7]);
+          u[4] = k_packs_epi64(v[8], v[9]);
+          u[5] = k_packs_epi64(v[10], v[11]);
+          u[6] = k_packs_epi64(v[12], v[13]);
+          u[7] = k_packs_epi64(v[14], v[15]);
+
+          v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+          v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+          v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+          v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+          v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+          v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+          v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+          v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+
+          u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+          u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+          u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+          u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+          u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS);
+          u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS);
+          u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS);
+          u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS);
+
+          sign[0] = _mm_cmplt_epi32(u[0], kZero);
+          sign[1] = _mm_cmplt_epi32(u[1], kZero);
+          sign[2] = _mm_cmplt_epi32(u[2], kZero);
+          sign[3] = _mm_cmplt_epi32(u[3], kZero);
+          sign[4] = _mm_cmplt_epi32(u[4], kZero);
+          sign[5] = _mm_cmplt_epi32(u[5], kZero);
+          sign[6] = _mm_cmplt_epi32(u[6], kZero);
+          sign[7] = _mm_cmplt_epi32(u[7], kZero);
+
+          u[0] = _mm_sub_epi32(u[0], sign[0]);
+          u[1] = _mm_sub_epi32(u[1], sign[1]);
+          u[2] = _mm_sub_epi32(u[2], sign[2]);
+          u[3] = _mm_sub_epi32(u[3], sign[3]);
+          u[4] = _mm_sub_epi32(u[4], sign[4]);
+          u[5] = _mm_sub_epi32(u[5], sign[5]);
+          u[6] = _mm_sub_epi32(u[6], sign[6]);
+          u[7] = _mm_sub_epi32(u[7], sign[7]);
+
+          u[0] = _mm_add_epi32(u[0], K32One);
+          u[1] = _mm_add_epi32(u[1], K32One);
+          u[2] = _mm_add_epi32(u[2], K32One);
+          u[3] = _mm_add_epi32(u[3], K32One);
+          u[4] = _mm_add_epi32(u[4], K32One);
+          u[5] = _mm_add_epi32(u[5], K32One);
+          u[6] = _mm_add_epi32(u[6], K32One);
+          u[7] = _mm_add_epi32(u[7], K32One);
+
+          u[0] = _mm_srai_epi32(u[0], 2);
+          u[1] = _mm_srai_epi32(u[1], 2);
+          u[2] = _mm_srai_epi32(u[2], 2);
+          u[3] = _mm_srai_epi32(u[3], 2);
+          u[4] = _mm_srai_epi32(u[4], 2);
+          u[5] = _mm_srai_epi32(u[5], 2);
+          u[6] = _mm_srai_epi32(u[6], 2);
+          u[7] = _mm_srai_epi32(u[7], 2);
+
+          out[4] = _mm_packs_epi32(u[0], u[1]);
+          out[20] = _mm_packs_epi32(u[2], u[3]);
+          out[12] = _mm_packs_epi32(u[4], u[5]);
+          out[28] = _mm_packs_epi32(u[6], u[7]);
+#if DCT_HIGH_BIT_DEPTH
+          overflow =
+              check_epi16_overflow_x4(&out[4], &out[20], &out[12], &out[28]);
+          if (overflow) {
+            HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        {
+          lstep3[16] = _mm_add_epi32(lstep2[18], lstep1[16]);
+          lstep3[17] = _mm_add_epi32(lstep2[19], lstep1[17]);
+          lstep3[18] = _mm_sub_epi32(lstep1[16], lstep2[18]);
+          lstep3[19] = _mm_sub_epi32(lstep1[17], lstep2[19]);
+          lstep3[20] = _mm_sub_epi32(lstep1[22], lstep2[20]);
+          lstep3[21] = _mm_sub_epi32(lstep1[23], lstep2[21]);
+          lstep3[22] = _mm_add_epi32(lstep2[20], lstep1[22]);
+          lstep3[23] = _mm_add_epi32(lstep2[21], lstep1[23]);
+          lstep3[24] = _mm_add_epi32(lstep2[26], lstep1[24]);
+          lstep3[25] = _mm_add_epi32(lstep2[27], lstep1[25]);
+          lstep3[26] = _mm_sub_epi32(lstep1[24], lstep2[26]);
+          lstep3[27] = _mm_sub_epi32(lstep1[25], lstep2[27]);
+          lstep3[28] = _mm_sub_epi32(lstep1[30], lstep2[28]);
+          lstep3[29] = _mm_sub_epi32(lstep1[31], lstep2[29]);
+          lstep3[30] = _mm_add_epi32(lstep2[28], lstep1[30]);
+          lstep3[31] = _mm_add_epi32(lstep2[29], lstep1[31]);
+        }
+        {
+          const __m128i k32_m04_p28 = pair_set_epi32(-cospi_4_64, cospi_28_64);
+          const __m128i k32_m28_m04 = pair_set_epi32(-cospi_28_64, -cospi_4_64);
+          const __m128i k32_m20_p12 = pair_set_epi32(-cospi_20_64, cospi_12_64);
+          const __m128i k32_m12_m20 =
+              pair_set_epi32(-cospi_12_64, -cospi_20_64);
+          const __m128i k32_p12_p20 = pair_set_epi32(cospi_12_64, cospi_20_64);
+          const __m128i k32_p28_p04 = pair_set_epi32(cospi_28_64, cospi_4_64);
+
+          u[0] = _mm_unpacklo_epi32(lstep2[34], lstep2[60]);
+          u[1] = _mm_unpackhi_epi32(lstep2[34], lstep2[60]);
+          u[2] = _mm_unpacklo_epi32(lstep2[35], lstep2[61]);
+          u[3] = _mm_unpackhi_epi32(lstep2[35], lstep2[61]);
+          u[4] = _mm_unpacklo_epi32(lstep2[36], lstep2[58]);
+          u[5] = _mm_unpackhi_epi32(lstep2[36], lstep2[58]);
+          u[6] = _mm_unpacklo_epi32(lstep2[37], lstep2[59]);
+          u[7] = _mm_unpackhi_epi32(lstep2[37], lstep2[59]);
+          u[8] = _mm_unpacklo_epi32(lstep2[42], lstep2[52]);
+          u[9] = _mm_unpackhi_epi32(lstep2[42], lstep2[52]);
+          u[10] = _mm_unpacklo_epi32(lstep2[43], lstep2[53]);
+          u[11] = _mm_unpackhi_epi32(lstep2[43], lstep2[53]);
+          u[12] = _mm_unpacklo_epi32(lstep2[44], lstep2[50]);
+          u[13] = _mm_unpackhi_epi32(lstep2[44], lstep2[50]);
+          u[14] = _mm_unpacklo_epi32(lstep2[45], lstep2[51]);
+          u[15] = _mm_unpackhi_epi32(lstep2[45], lstep2[51]);
+
+          v[0] = k_madd_epi32(u[0], k32_m04_p28);
+          v[1] = k_madd_epi32(u[1], k32_m04_p28);
+          v[2] = k_madd_epi32(u[2], k32_m04_p28);
+          v[3] = k_madd_epi32(u[3], k32_m04_p28);
+          v[4] = k_madd_epi32(u[4], k32_m28_m04);
+          v[5] = k_madd_epi32(u[5], k32_m28_m04);
+          v[6] = k_madd_epi32(u[6], k32_m28_m04);
+          v[7] = k_madd_epi32(u[7], k32_m28_m04);
+          v[8] = k_madd_epi32(u[8], k32_m20_p12);
+          v[9] = k_madd_epi32(u[9], k32_m20_p12);
+          v[10] = k_madd_epi32(u[10], k32_m20_p12);
+          v[11] = k_madd_epi32(u[11], k32_m20_p12);
+          v[12] = k_madd_epi32(u[12], k32_m12_m20);
+          v[13] = k_madd_epi32(u[13], k32_m12_m20);
+          v[14] = k_madd_epi32(u[14], k32_m12_m20);
+          v[15] = k_madd_epi32(u[15], k32_m12_m20);
+          v[16] = k_madd_epi32(u[12], k32_m20_p12);
+          v[17] = k_madd_epi32(u[13], k32_m20_p12);
+          v[18] = k_madd_epi32(u[14], k32_m20_p12);
+          v[19] = k_madd_epi32(u[15], k32_m20_p12);
+          v[20] = k_madd_epi32(u[8], k32_p12_p20);
+          v[21] = k_madd_epi32(u[9], k32_p12_p20);
+          v[22] = k_madd_epi32(u[10], k32_p12_p20);
+          v[23] = k_madd_epi32(u[11], k32_p12_p20);
+          v[24] = k_madd_epi32(u[4], k32_m04_p28);
+          v[25] = k_madd_epi32(u[5], k32_m04_p28);
+          v[26] = k_madd_epi32(u[6], k32_m04_p28);
+          v[27] = k_madd_epi32(u[7], k32_m04_p28);
+          v[28] = k_madd_epi32(u[0], k32_p28_p04);
+          v[29] = k_madd_epi32(u[1], k32_p28_p04);
+          v[30] = k_madd_epi32(u[2], k32_p28_p04);
+          v[31] = k_madd_epi32(u[3], k32_p28_p04);
+
+#if DCT_HIGH_BIT_DEPTH
+          overflow = k_check_epi32_overflow_32(
+              &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8],
+              &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &v[16],
+              &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], &v[24],
+              &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], &kZero);
+          if (overflow) {
+            HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+          u[0] = k_packs_epi64(v[0], v[1]);
+          u[1] = k_packs_epi64(v[2], v[3]);
+          u[2] = k_packs_epi64(v[4], v[5]);
+          u[3] = k_packs_epi64(v[6], v[7]);
+          u[4] = k_packs_epi64(v[8], v[9]);
+          u[5] = k_packs_epi64(v[10], v[11]);
+          u[6] = k_packs_epi64(v[12], v[13]);
+          u[7] = k_packs_epi64(v[14], v[15]);
+          u[8] = k_packs_epi64(v[16], v[17]);
+          u[9] = k_packs_epi64(v[18], v[19]);
+          u[10] = k_packs_epi64(v[20], v[21]);
+          u[11] = k_packs_epi64(v[22], v[23]);
+          u[12] = k_packs_epi64(v[24], v[25]);
+          u[13] = k_packs_epi64(v[26], v[27]);
+          u[14] = k_packs_epi64(v[28], v[29]);
+          u[15] = k_packs_epi64(v[30], v[31]);
+
+          v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+          v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+          v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+          v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+          v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+          v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+          v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+          v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+          v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING);
+          v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING);
+          v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING);
+          v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING);
+          v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING);
+          v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING);
+          v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING);
+          v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING);
+
+          lstep3[34] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+          lstep3[35] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+          lstep3[36] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+          lstep3[37] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+          lstep3[42] = _mm_srai_epi32(v[4], DCT_CONST_BITS);
+          lstep3[43] = _mm_srai_epi32(v[5], DCT_CONST_BITS);
+          lstep3[44] = _mm_srai_epi32(v[6], DCT_CONST_BITS);
+          lstep3[45] = _mm_srai_epi32(v[7], DCT_CONST_BITS);
+          lstep3[50] = _mm_srai_epi32(v[8], DCT_CONST_BITS);
+          lstep3[51] = _mm_srai_epi32(v[9], DCT_CONST_BITS);
+          lstep3[52] = _mm_srai_epi32(v[10], DCT_CONST_BITS);
+          lstep3[53] = _mm_srai_epi32(v[11], DCT_CONST_BITS);
+          lstep3[58] = _mm_srai_epi32(v[12], DCT_CONST_BITS);
+          lstep3[59] = _mm_srai_epi32(v[13], DCT_CONST_BITS);
+          lstep3[60] = _mm_srai_epi32(v[14], DCT_CONST_BITS);
+          lstep3[61] = _mm_srai_epi32(v[15], DCT_CONST_BITS);
+        }
+        // stage 7
+        {
+          const __m128i k32_p30_p02 = pair_set_epi32(cospi_30_64, cospi_2_64);
+          const __m128i k32_p14_p18 = pair_set_epi32(cospi_14_64, cospi_18_64);
+          const __m128i k32_p22_p10 = pair_set_epi32(cospi_22_64, cospi_10_64);
+          const __m128i k32_p06_p26 = pair_set_epi32(cospi_6_64, cospi_26_64);
+          const __m128i k32_m26_p06 = pair_set_epi32(-cospi_26_64, cospi_6_64);
+          const __m128i k32_m10_p22 = pair_set_epi32(-cospi_10_64, cospi_22_64);
+          const __m128i k32_m18_p14 = pair_set_epi32(-cospi_18_64, cospi_14_64);
+          const __m128i k32_m02_p30 = pair_set_epi32(-cospi_2_64, cospi_30_64);
+
+          u[0] = _mm_unpacklo_epi32(lstep3[16], lstep3[30]);
+          u[1] = _mm_unpackhi_epi32(lstep3[16], lstep3[30]);
+          u[2] = _mm_unpacklo_epi32(lstep3[17], lstep3[31]);
+          u[3] = _mm_unpackhi_epi32(lstep3[17], lstep3[31]);
+          u[4] = _mm_unpacklo_epi32(lstep3[18], lstep3[28]);
+          u[5] = _mm_unpackhi_epi32(lstep3[18], lstep3[28]);
+          u[6] = _mm_unpacklo_epi32(lstep3[19], lstep3[29]);
+          u[7] = _mm_unpackhi_epi32(lstep3[19], lstep3[29]);
+          u[8] = _mm_unpacklo_epi32(lstep3[20], lstep3[26]);
+          u[9] = _mm_unpackhi_epi32(lstep3[20], lstep3[26]);
+          u[10] = _mm_unpacklo_epi32(lstep3[21], lstep3[27]);
+          u[11] = _mm_unpackhi_epi32(lstep3[21], lstep3[27]);
+          u[12] = _mm_unpacklo_epi32(lstep3[22], lstep3[24]);
+          u[13] = _mm_unpackhi_epi32(lstep3[22], lstep3[24]);
+          u[14] = _mm_unpacklo_epi32(lstep3[23], lstep3[25]);
+          u[15] = _mm_unpackhi_epi32(lstep3[23], lstep3[25]);
+
+          v[0] = k_madd_epi32(u[0], k32_p30_p02);
+          v[1] = k_madd_epi32(u[1], k32_p30_p02);
+          v[2] = k_madd_epi32(u[2], k32_p30_p02);
+          v[3] = k_madd_epi32(u[3], k32_p30_p02);
+          v[4] = k_madd_epi32(u[4], k32_p14_p18);
+          v[5] = k_madd_epi32(u[5], k32_p14_p18);
+          v[6] = k_madd_epi32(u[6], k32_p14_p18);
+          v[7] = k_madd_epi32(u[7], k32_p14_p18);
+          v[8] = k_madd_epi32(u[8], k32_p22_p10);
+          v[9] = k_madd_epi32(u[9], k32_p22_p10);
+          v[10] = k_madd_epi32(u[10], k32_p22_p10);
+          v[11] = k_madd_epi32(u[11], k32_p22_p10);
+          v[12] = k_madd_epi32(u[12], k32_p06_p26);
+          v[13] = k_madd_epi32(u[13], k32_p06_p26);
+          v[14] = k_madd_epi32(u[14], k32_p06_p26);
+          v[15] = k_madd_epi32(u[15], k32_p06_p26);
+          v[16] = k_madd_epi32(u[12], k32_m26_p06);
+          v[17] = k_madd_epi32(u[13], k32_m26_p06);
+          v[18] = k_madd_epi32(u[14], k32_m26_p06);
+          v[19] = k_madd_epi32(u[15], k32_m26_p06);
+          v[20] = k_madd_epi32(u[8], k32_m10_p22);
+          v[21] = k_madd_epi32(u[9], k32_m10_p22);
+          v[22] = k_madd_epi32(u[10], k32_m10_p22);
+          v[23] = k_madd_epi32(u[11], k32_m10_p22);
+          v[24] = k_madd_epi32(u[4], k32_m18_p14);
+          v[25] = k_madd_epi32(u[5], k32_m18_p14);
+          v[26] = k_madd_epi32(u[6], k32_m18_p14);
+          v[27] = k_madd_epi32(u[7], k32_m18_p14);
+          v[28] = k_madd_epi32(u[0], k32_m02_p30);
+          v[29] = k_madd_epi32(u[1], k32_m02_p30);
+          v[30] = k_madd_epi32(u[2], k32_m02_p30);
+          v[31] = k_madd_epi32(u[3], k32_m02_p30);
+
+#if DCT_HIGH_BIT_DEPTH
+          overflow = k_check_epi32_overflow_32(
+              &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8],
+              &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &v[16],
+              &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], &v[24],
+              &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], &kZero);
+          if (overflow) {
+            HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+          u[0] = k_packs_epi64(v[0], v[1]);
+          u[1] = k_packs_epi64(v[2], v[3]);
+          u[2] = k_packs_epi64(v[4], v[5]);
+          u[3] = k_packs_epi64(v[6], v[7]);
+          u[4] = k_packs_epi64(v[8], v[9]);
+          u[5] = k_packs_epi64(v[10], v[11]);
+          u[6] = k_packs_epi64(v[12], v[13]);
+          u[7] = k_packs_epi64(v[14], v[15]);
+          u[8] = k_packs_epi64(v[16], v[17]);
+          u[9] = k_packs_epi64(v[18], v[19]);
+          u[10] = k_packs_epi64(v[20], v[21]);
+          u[11] = k_packs_epi64(v[22], v[23]);
+          u[12] = k_packs_epi64(v[24], v[25]);
+          u[13] = k_packs_epi64(v[26], v[27]);
+          u[14] = k_packs_epi64(v[28], v[29]);
+          u[15] = k_packs_epi64(v[30], v[31]);
+
+          v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+          v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+          v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+          v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+          v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+          v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+          v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+          v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+          v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING);
+          v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING);
+          v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING);
+          v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING);
+          v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING);
+          v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING);
+          v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING);
+          v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING);
+
+          u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+          u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+          u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+          u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+          u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS);
+          u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS);
+          u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS);
+          u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS);
+          u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS);
+          u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS);
+          u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS);
+          u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS);
+          u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS);
+          u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS);
+          u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS);
+          u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS);
+
+          v[0] = _mm_cmplt_epi32(u[0], kZero);
+          v[1] = _mm_cmplt_epi32(u[1], kZero);
+          v[2] = _mm_cmplt_epi32(u[2], kZero);
+          v[3] = _mm_cmplt_epi32(u[3], kZero);
+          v[4] = _mm_cmplt_epi32(u[4], kZero);
+          v[5] = _mm_cmplt_epi32(u[5], kZero);
+          v[6] = _mm_cmplt_epi32(u[6], kZero);
+          v[7] = _mm_cmplt_epi32(u[7], kZero);
+          v[8] = _mm_cmplt_epi32(u[8], kZero);
+          v[9] = _mm_cmplt_epi32(u[9], kZero);
+          v[10] = _mm_cmplt_epi32(u[10], kZero);
+          v[11] = _mm_cmplt_epi32(u[11], kZero);
+          v[12] = _mm_cmplt_epi32(u[12], kZero);
+          v[13] = _mm_cmplt_epi32(u[13], kZero);
+          v[14] = _mm_cmplt_epi32(u[14], kZero);
+          v[15] = _mm_cmplt_epi32(u[15], kZero);
+
+          u[0] = _mm_sub_epi32(u[0], v[0]);
+          u[1] = _mm_sub_epi32(u[1], v[1]);
+          u[2] = _mm_sub_epi32(u[2], v[2]);
+          u[3] = _mm_sub_epi32(u[3], v[3]);
+          u[4] = _mm_sub_epi32(u[4], v[4]);
+          u[5] = _mm_sub_epi32(u[5], v[5]);
+          u[6] = _mm_sub_epi32(u[6], v[6]);
+          u[7] = _mm_sub_epi32(u[7], v[7]);
+          u[8] = _mm_sub_epi32(u[8], v[8]);
+          u[9] = _mm_sub_epi32(u[9], v[9]);
+          u[10] = _mm_sub_epi32(u[10], v[10]);
+          u[11] = _mm_sub_epi32(u[11], v[11]);
+          u[12] = _mm_sub_epi32(u[12], v[12]);
+          u[13] = _mm_sub_epi32(u[13], v[13]);
+          u[14] = _mm_sub_epi32(u[14], v[14]);
+          u[15] = _mm_sub_epi32(u[15], v[15]);
+
+          v[0] = _mm_add_epi32(u[0], K32One);
+          v[1] = _mm_add_epi32(u[1], K32One);
+          v[2] = _mm_add_epi32(u[2], K32One);
+          v[3] = _mm_add_epi32(u[3], K32One);
+          v[4] = _mm_add_epi32(u[4], K32One);
+          v[5] = _mm_add_epi32(u[5], K32One);
+          v[6] = _mm_add_epi32(u[6], K32One);
+          v[7] = _mm_add_epi32(u[7], K32One);
+          v[8] = _mm_add_epi32(u[8], K32One);
+          v[9] = _mm_add_epi32(u[9], K32One);
+          v[10] = _mm_add_epi32(u[10], K32One);
+          v[11] = _mm_add_epi32(u[11], K32One);
+          v[12] = _mm_add_epi32(u[12], K32One);
+          v[13] = _mm_add_epi32(u[13], K32One);
+          v[14] = _mm_add_epi32(u[14], K32One);
+          v[15] = _mm_add_epi32(u[15], K32One);
+
+          u[0] = _mm_srai_epi32(v[0], 2);
+          u[1] = _mm_srai_epi32(v[1], 2);
+          u[2] = _mm_srai_epi32(v[2], 2);
+          u[3] = _mm_srai_epi32(v[3], 2);
+          u[4] = _mm_srai_epi32(v[4], 2);
+          u[5] = _mm_srai_epi32(v[5], 2);
+          u[6] = _mm_srai_epi32(v[6], 2);
+          u[7] = _mm_srai_epi32(v[7], 2);
+          u[8] = _mm_srai_epi32(v[8], 2);
+          u[9] = _mm_srai_epi32(v[9], 2);
+          u[10] = _mm_srai_epi32(v[10], 2);
+          u[11] = _mm_srai_epi32(v[11], 2);
+          u[12] = _mm_srai_epi32(v[12], 2);
+          u[13] = _mm_srai_epi32(v[13], 2);
+          u[14] = _mm_srai_epi32(v[14], 2);
+          u[15] = _mm_srai_epi32(v[15], 2);
+
+          out[2] = _mm_packs_epi32(u[0], u[1]);
+          out[18] = _mm_packs_epi32(u[2], u[3]);
+          out[10] = _mm_packs_epi32(u[4], u[5]);
+          out[26] = _mm_packs_epi32(u[6], u[7]);
+          out[6] = _mm_packs_epi32(u[8], u[9]);
+          out[22] = _mm_packs_epi32(u[10], u[11]);
+          out[14] = _mm_packs_epi32(u[12], u[13]);
+          out[30] = _mm_packs_epi32(u[14], u[15]);
+#if DCT_HIGH_BIT_DEPTH
+          overflow =
+              check_epi16_overflow_x8(&out[2], &out[18], &out[10], &out[26],
+                                      &out[6], &out[22], &out[14], &out[30]);
+          if (overflow) {
+            HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        {
+          lstep1[32] = _mm_add_epi32(lstep3[34], lstep2[32]);
+          lstep1[33] = _mm_add_epi32(lstep3[35], lstep2[33]);
+          lstep1[34] = _mm_sub_epi32(lstep2[32], lstep3[34]);
+          lstep1[35] = _mm_sub_epi32(lstep2[33], lstep3[35]);
+          lstep1[36] = _mm_sub_epi32(lstep2[38], lstep3[36]);
+          lstep1[37] = _mm_sub_epi32(lstep2[39], lstep3[37]);
+          lstep1[38] = _mm_add_epi32(lstep3[36], lstep2[38]);
+          lstep1[39] = _mm_add_epi32(lstep3[37], lstep2[39]);
+          lstep1[40] = _mm_add_epi32(lstep3[42], lstep2[40]);
+          lstep1[41] = _mm_add_epi32(lstep3[43], lstep2[41]);
+          lstep1[42] = _mm_sub_epi32(lstep2[40], lstep3[42]);
+          lstep1[43] = _mm_sub_epi32(lstep2[41], lstep3[43]);
+          lstep1[44] = _mm_sub_epi32(lstep2[46], lstep3[44]);
+          lstep1[45] = _mm_sub_epi32(lstep2[47], lstep3[45]);
+          lstep1[46] = _mm_add_epi32(lstep3[44], lstep2[46]);
+          lstep1[47] = _mm_add_epi32(lstep3[45], lstep2[47]);
+          lstep1[48] = _mm_add_epi32(lstep3[50], lstep2[48]);
+          lstep1[49] = _mm_add_epi32(lstep3[51], lstep2[49]);
+          lstep1[50] = _mm_sub_epi32(lstep2[48], lstep3[50]);
+          lstep1[51] = _mm_sub_epi32(lstep2[49], lstep3[51]);
+          lstep1[52] = _mm_sub_epi32(lstep2[54], lstep3[52]);
+          lstep1[53] = _mm_sub_epi32(lstep2[55], lstep3[53]);
+          lstep1[54] = _mm_add_epi32(lstep3[52], lstep2[54]);
+          lstep1[55] = _mm_add_epi32(lstep3[53], lstep2[55]);
+          lstep1[56] = _mm_add_epi32(lstep3[58], lstep2[56]);
+          lstep1[57] = _mm_add_epi32(lstep3[59], lstep2[57]);
+          lstep1[58] = _mm_sub_epi32(lstep2[56], lstep3[58]);
+          lstep1[59] = _mm_sub_epi32(lstep2[57], lstep3[59]);
+          lstep1[60] = _mm_sub_epi32(lstep2[62], lstep3[60]);
+          lstep1[61] = _mm_sub_epi32(lstep2[63], lstep3[61]);
+          lstep1[62] = _mm_add_epi32(lstep3[60], lstep2[62]);
+          lstep1[63] = _mm_add_epi32(lstep3[61], lstep2[63]);
+        }
+        // stage 8
+        {
+          const __m128i k32_p31_p01 = pair_set_epi32(cospi_31_64, cospi_1_64);
+          const __m128i k32_p15_p17 = pair_set_epi32(cospi_15_64, cospi_17_64);
+          const __m128i k32_p23_p09 = pair_set_epi32(cospi_23_64, cospi_9_64);
+          const __m128i k32_p07_p25 = pair_set_epi32(cospi_7_64, cospi_25_64);
+          const __m128i k32_m25_p07 = pair_set_epi32(-cospi_25_64, cospi_7_64);
+          const __m128i k32_m09_p23 = pair_set_epi32(-cospi_9_64, cospi_23_64);
+          const __m128i k32_m17_p15 = pair_set_epi32(-cospi_17_64, cospi_15_64);
+          const __m128i k32_m01_p31 = pair_set_epi32(-cospi_1_64, cospi_31_64);
+
+          u[0] = _mm_unpacklo_epi32(lstep1[32], lstep1[62]);
+          u[1] = _mm_unpackhi_epi32(lstep1[32], lstep1[62]);
+          u[2] = _mm_unpacklo_epi32(lstep1[33], lstep1[63]);
+          u[3] = _mm_unpackhi_epi32(lstep1[33], lstep1[63]);
+          u[4] = _mm_unpacklo_epi32(lstep1[34], lstep1[60]);
+          u[5] = _mm_unpackhi_epi32(lstep1[34], lstep1[60]);
+          u[6] = _mm_unpacklo_epi32(lstep1[35], lstep1[61]);
+          u[7] = _mm_unpackhi_epi32(lstep1[35], lstep1[61]);
+          u[8] = _mm_unpacklo_epi32(lstep1[36], lstep1[58]);
+          u[9] = _mm_unpackhi_epi32(lstep1[36], lstep1[58]);
+          u[10] = _mm_unpacklo_epi32(lstep1[37], lstep1[59]);
+          u[11] = _mm_unpackhi_epi32(lstep1[37], lstep1[59]);
+          u[12] = _mm_unpacklo_epi32(lstep1[38], lstep1[56]);
+          u[13] = _mm_unpackhi_epi32(lstep1[38], lstep1[56]);
+          u[14] = _mm_unpacklo_epi32(lstep1[39], lstep1[57]);
+          u[15] = _mm_unpackhi_epi32(lstep1[39], lstep1[57]);
+
+          v[0] = k_madd_epi32(u[0], k32_p31_p01);
+          v[1] = k_madd_epi32(u[1], k32_p31_p01);
+          v[2] = k_madd_epi32(u[2], k32_p31_p01);
+          v[3] = k_madd_epi32(u[3], k32_p31_p01);
+          v[4] = k_madd_epi32(u[4], k32_p15_p17);
+          v[5] = k_madd_epi32(u[5], k32_p15_p17);
+          v[6] = k_madd_epi32(u[6], k32_p15_p17);
+          v[7] = k_madd_epi32(u[7], k32_p15_p17);
+          v[8] = k_madd_epi32(u[8], k32_p23_p09);
+          v[9] = k_madd_epi32(u[9], k32_p23_p09);
+          v[10] = k_madd_epi32(u[10], k32_p23_p09);
+          v[11] = k_madd_epi32(u[11], k32_p23_p09);
+          v[12] = k_madd_epi32(u[12], k32_p07_p25);
+          v[13] = k_madd_epi32(u[13], k32_p07_p25);
+          v[14] = k_madd_epi32(u[14], k32_p07_p25);
+          v[15] = k_madd_epi32(u[15], k32_p07_p25);
+          v[16] = k_madd_epi32(u[12], k32_m25_p07);
+          v[17] = k_madd_epi32(u[13], k32_m25_p07);
+          v[18] = k_madd_epi32(u[14], k32_m25_p07);
+          v[19] = k_madd_epi32(u[15], k32_m25_p07);
+          v[20] = k_madd_epi32(u[8], k32_m09_p23);
+          v[21] = k_madd_epi32(u[9], k32_m09_p23);
+          v[22] = k_madd_epi32(u[10], k32_m09_p23);
+          v[23] = k_madd_epi32(u[11], k32_m09_p23);
+          v[24] = k_madd_epi32(u[4], k32_m17_p15);
+          v[25] = k_madd_epi32(u[5], k32_m17_p15);
+          v[26] = k_madd_epi32(u[6], k32_m17_p15);
+          v[27] = k_madd_epi32(u[7], k32_m17_p15);
+          v[28] = k_madd_epi32(u[0], k32_m01_p31);
+          v[29] = k_madd_epi32(u[1], k32_m01_p31);
+          v[30] = k_madd_epi32(u[2], k32_m01_p31);
+          v[31] = k_madd_epi32(u[3], k32_m01_p31);
+
+#if DCT_HIGH_BIT_DEPTH
+          overflow = k_check_epi32_overflow_32(
+              &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8],
+              &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &v[16],
+              &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], &v[24],
+              &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], &kZero);
+          if (overflow) {
+            HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+          u[0] = k_packs_epi64(v[0], v[1]);
+          u[1] = k_packs_epi64(v[2], v[3]);
+          u[2] = k_packs_epi64(v[4], v[5]);
+          u[3] = k_packs_epi64(v[6], v[7]);
+          u[4] = k_packs_epi64(v[8], v[9]);
+          u[5] = k_packs_epi64(v[10], v[11]);
+          u[6] = k_packs_epi64(v[12], v[13]);
+          u[7] = k_packs_epi64(v[14], v[15]);
+          u[8] = k_packs_epi64(v[16], v[17]);
+          u[9] = k_packs_epi64(v[18], v[19]);
+          u[10] = k_packs_epi64(v[20], v[21]);
+          u[11] = k_packs_epi64(v[22], v[23]);
+          u[12] = k_packs_epi64(v[24], v[25]);
+          u[13] = k_packs_epi64(v[26], v[27]);
+          u[14] = k_packs_epi64(v[28], v[29]);
+          u[15] = k_packs_epi64(v[30], v[31]);
+
+          v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+          v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+          v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+          v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+          v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+          v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+          v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+          v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+          v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING);
+          v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING);
+          v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING);
+          v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING);
+          v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING);
+          v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING);
+          v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING);
+          v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING);
+
+          u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+          u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+          u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+          u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+          u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS);
+          u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS);
+          u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS);
+          u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS);
+          u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS);
+          u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS);
+          u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS);
+          u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS);
+          u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS);
+          u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS);
+          u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS);
+          u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS);
+
+          v[0] = _mm_cmplt_epi32(u[0], kZero);
+          v[1] = _mm_cmplt_epi32(u[1], kZero);
+          v[2] = _mm_cmplt_epi32(u[2], kZero);
+          v[3] = _mm_cmplt_epi32(u[3], kZero);
+          v[4] = _mm_cmplt_epi32(u[4], kZero);
+          v[5] = _mm_cmplt_epi32(u[5], kZero);
+          v[6] = _mm_cmplt_epi32(u[6], kZero);
+          v[7] = _mm_cmplt_epi32(u[7], kZero);
+          v[8] = _mm_cmplt_epi32(u[8], kZero);
+          v[9] = _mm_cmplt_epi32(u[9], kZero);
+          v[10] = _mm_cmplt_epi32(u[10], kZero);
+          v[11] = _mm_cmplt_epi32(u[11], kZero);
+          v[12] = _mm_cmplt_epi32(u[12], kZero);
+          v[13] = _mm_cmplt_epi32(u[13], kZero);
+          v[14] = _mm_cmplt_epi32(u[14], kZero);
+          v[15] = _mm_cmplt_epi32(u[15], kZero);
+
+          u[0] = _mm_sub_epi32(u[0], v[0]);
+          u[1] = _mm_sub_epi32(u[1], v[1]);
+          u[2] = _mm_sub_epi32(u[2], v[2]);
+          u[3] = _mm_sub_epi32(u[3], v[3]);
+          u[4] = _mm_sub_epi32(u[4], v[4]);
+          u[5] = _mm_sub_epi32(u[5], v[5]);
+          u[6] = _mm_sub_epi32(u[6], v[6]);
+          u[7] = _mm_sub_epi32(u[7], v[7]);
+          u[8] = _mm_sub_epi32(u[8], v[8]);
+          u[9] = _mm_sub_epi32(u[9], v[9]);
+          u[10] = _mm_sub_epi32(u[10], v[10]);
+          u[11] = _mm_sub_epi32(u[11], v[11]);
+          u[12] = _mm_sub_epi32(u[12], v[12]);
+          u[13] = _mm_sub_epi32(u[13], v[13]);
+          u[14] = _mm_sub_epi32(u[14], v[14]);
+          u[15] = _mm_sub_epi32(u[15], v[15]);
+
+          v[0] = _mm_add_epi32(u[0], K32One);
+          v[1] = _mm_add_epi32(u[1], K32One);
+          v[2] = _mm_add_epi32(u[2], K32One);
+          v[3] = _mm_add_epi32(u[3], K32One);
+          v[4] = _mm_add_epi32(u[4], K32One);
+          v[5] = _mm_add_epi32(u[5], K32One);
+          v[6] = _mm_add_epi32(u[6], K32One);
+          v[7] = _mm_add_epi32(u[7], K32One);
+          v[8] = _mm_add_epi32(u[8], K32One);
+          v[9] = _mm_add_epi32(u[9], K32One);
+          v[10] = _mm_add_epi32(u[10], K32One);
+          v[11] = _mm_add_epi32(u[11], K32One);
+          v[12] = _mm_add_epi32(u[12], K32One);
+          v[13] = _mm_add_epi32(u[13], K32One);
+          v[14] = _mm_add_epi32(u[14], K32One);
+          v[15] = _mm_add_epi32(u[15], K32One);
+
+          u[0] = _mm_srai_epi32(v[0], 2);
+          u[1] = _mm_srai_epi32(v[1], 2);
+          u[2] = _mm_srai_epi32(v[2], 2);
+          u[3] = _mm_srai_epi32(v[3], 2);
+          u[4] = _mm_srai_epi32(v[4], 2);
+          u[5] = _mm_srai_epi32(v[5], 2);
+          u[6] = _mm_srai_epi32(v[6], 2);
+          u[7] = _mm_srai_epi32(v[7], 2);
+          u[8] = _mm_srai_epi32(v[8], 2);
+          u[9] = _mm_srai_epi32(v[9], 2);
+          u[10] = _mm_srai_epi32(v[10], 2);
+          u[11] = _mm_srai_epi32(v[11], 2);
+          u[12] = _mm_srai_epi32(v[12], 2);
+          u[13] = _mm_srai_epi32(v[13], 2);
+          u[14] = _mm_srai_epi32(v[14], 2);
+          u[15] = _mm_srai_epi32(v[15], 2);
+
+          out[1] = _mm_packs_epi32(u[0], u[1]);
+          out[17] = _mm_packs_epi32(u[2], u[3]);
+          out[9] = _mm_packs_epi32(u[4], u[5]);
+          out[25] = _mm_packs_epi32(u[6], u[7]);
+          out[7] = _mm_packs_epi32(u[8], u[9]);
+          out[23] = _mm_packs_epi32(u[10], u[11]);
+          out[15] = _mm_packs_epi32(u[12], u[13]);
+          out[31] = _mm_packs_epi32(u[14], u[15]);
+#if DCT_HIGH_BIT_DEPTH
+          overflow =
+              check_epi16_overflow_x8(&out[1], &out[17], &out[9], &out[25],
+                                      &out[7], &out[23], &out[15], &out[31]);
+          if (overflow) {
+            HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        {
+          const __m128i k32_p27_p05 = pair_set_epi32(cospi_27_64, cospi_5_64);
+          const __m128i k32_p11_p21 = pair_set_epi32(cospi_11_64, cospi_21_64);
+          const __m128i k32_p19_p13 = pair_set_epi32(cospi_19_64, cospi_13_64);
+          const __m128i k32_p03_p29 = pair_set_epi32(cospi_3_64, cospi_29_64);
+          const __m128i k32_m29_p03 = pair_set_epi32(-cospi_29_64, cospi_3_64);
+          const __m128i k32_m13_p19 = pair_set_epi32(-cospi_13_64, cospi_19_64);
+          const __m128i k32_m21_p11 = pair_set_epi32(-cospi_21_64, cospi_11_64);
+          const __m128i k32_m05_p27 = pair_set_epi32(-cospi_5_64, cospi_27_64);
+
+          u[0] = _mm_unpacklo_epi32(lstep1[40], lstep1[54]);
+          u[1] = _mm_unpackhi_epi32(lstep1[40], lstep1[54]);
+          u[2] = _mm_unpacklo_epi32(lstep1[41], lstep1[55]);
+          u[3] = _mm_unpackhi_epi32(lstep1[41], lstep1[55]);
+          u[4] = _mm_unpacklo_epi32(lstep1[42], lstep1[52]);
+          u[5] = _mm_unpackhi_epi32(lstep1[42], lstep1[52]);
+          u[6] = _mm_unpacklo_epi32(lstep1[43], lstep1[53]);
+          u[7] = _mm_unpackhi_epi32(lstep1[43], lstep1[53]);
+          u[8] = _mm_unpacklo_epi32(lstep1[44], lstep1[50]);
+          u[9] = _mm_unpackhi_epi32(lstep1[44], lstep1[50]);
+          u[10] = _mm_unpacklo_epi32(lstep1[45], lstep1[51]);
+          u[11] = _mm_unpackhi_epi32(lstep1[45], lstep1[51]);
+          u[12] = _mm_unpacklo_epi32(lstep1[46], lstep1[48]);
+          u[13] = _mm_unpackhi_epi32(lstep1[46], lstep1[48]);
+          u[14] = _mm_unpacklo_epi32(lstep1[47], lstep1[49]);
+          u[15] = _mm_unpackhi_epi32(lstep1[47], lstep1[49]);
+
+          v[0] = k_madd_epi32(u[0], k32_p27_p05);
+          v[1] = k_madd_epi32(u[1], k32_p27_p05);
+          v[2] = k_madd_epi32(u[2], k32_p27_p05);
+          v[3] = k_madd_epi32(u[3], k32_p27_p05);
+          v[4] = k_madd_epi32(u[4], k32_p11_p21);
+          v[5] = k_madd_epi32(u[5], k32_p11_p21);
+          v[6] = k_madd_epi32(u[6], k32_p11_p21);
+          v[7] = k_madd_epi32(u[7], k32_p11_p21);
+          v[8] = k_madd_epi32(u[8], k32_p19_p13);
+          v[9] = k_madd_epi32(u[9], k32_p19_p13);
+          v[10] = k_madd_epi32(u[10], k32_p19_p13);
+          v[11] = k_madd_epi32(u[11], k32_p19_p13);
+          v[12] = k_madd_epi32(u[12], k32_p03_p29);
+          v[13] = k_madd_epi32(u[13], k32_p03_p29);
+          v[14] = k_madd_epi32(u[14], k32_p03_p29);
+          v[15] = k_madd_epi32(u[15], k32_p03_p29);
+          v[16] = k_madd_epi32(u[12], k32_m29_p03);
+          v[17] = k_madd_epi32(u[13], k32_m29_p03);
+          v[18] = k_madd_epi32(u[14], k32_m29_p03);
+          v[19] = k_madd_epi32(u[15], k32_m29_p03);
+          v[20] = k_madd_epi32(u[8], k32_m13_p19);
+          v[21] = k_madd_epi32(u[9], k32_m13_p19);
+          v[22] = k_madd_epi32(u[10], k32_m13_p19);
+          v[23] = k_madd_epi32(u[11], k32_m13_p19);
+          v[24] = k_madd_epi32(u[4], k32_m21_p11);
+          v[25] = k_madd_epi32(u[5], k32_m21_p11);
+          v[26] = k_madd_epi32(u[6], k32_m21_p11);
+          v[27] = k_madd_epi32(u[7], k32_m21_p11);
+          v[28] = k_madd_epi32(u[0], k32_m05_p27);
+          v[29] = k_madd_epi32(u[1], k32_m05_p27);
+          v[30] = k_madd_epi32(u[2], k32_m05_p27);
+          v[31] = k_madd_epi32(u[3], k32_m05_p27);
+
+#if DCT_HIGH_BIT_DEPTH
+          overflow = k_check_epi32_overflow_32(
+              &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8],
+              &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &v[16],
+              &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], &v[24],
+              &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], &kZero);
+          if (overflow) {
+            HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+          u[0] = k_packs_epi64(v[0], v[1]);
+          u[1] = k_packs_epi64(v[2], v[3]);
+          u[2] = k_packs_epi64(v[4], v[5]);
+          u[3] = k_packs_epi64(v[6], v[7]);
+          u[4] = k_packs_epi64(v[8], v[9]);
+          u[5] = k_packs_epi64(v[10], v[11]);
+          u[6] = k_packs_epi64(v[12], v[13]);
+          u[7] = k_packs_epi64(v[14], v[15]);
+          u[8] = k_packs_epi64(v[16], v[17]);
+          u[9] = k_packs_epi64(v[18], v[19]);
+          u[10] = k_packs_epi64(v[20], v[21]);
+          u[11] = k_packs_epi64(v[22], v[23]);
+          u[12] = k_packs_epi64(v[24], v[25]);
+          u[13] = k_packs_epi64(v[26], v[27]);
+          u[14] = k_packs_epi64(v[28], v[29]);
+          u[15] = k_packs_epi64(v[30], v[31]);
+
+          v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+          v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+          v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+          v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+          v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+          v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+          v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+          v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+          v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING);
+          v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING);
+          v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING);
+          v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING);
+          v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING);
+          v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING);
+          v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING);
+          v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING);
+
+          u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+          u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+          u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+          u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+          u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS);
+          u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS);
+          u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS);
+          u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS);
+          u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS);
+          u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS);
+          u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS);
+          u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS);
+          u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS);
+          u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS);
+          u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS);
+          u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS);
+
+          v[0] = _mm_cmplt_epi32(u[0], kZero);
+          v[1] = _mm_cmplt_epi32(u[1], kZero);
+          v[2] = _mm_cmplt_epi32(u[2], kZero);
+          v[3] = _mm_cmplt_epi32(u[3], kZero);
+          v[4] = _mm_cmplt_epi32(u[4], kZero);
+          v[5] = _mm_cmplt_epi32(u[5], kZero);
+          v[6] = _mm_cmplt_epi32(u[6], kZero);
+          v[7] = _mm_cmplt_epi32(u[7], kZero);
+          v[8] = _mm_cmplt_epi32(u[8], kZero);
+          v[9] = _mm_cmplt_epi32(u[9], kZero);
+          v[10] = _mm_cmplt_epi32(u[10], kZero);
+          v[11] = _mm_cmplt_epi32(u[11], kZero);
+          v[12] = _mm_cmplt_epi32(u[12], kZero);
+          v[13] = _mm_cmplt_epi32(u[13], kZero);
+          v[14] = _mm_cmplt_epi32(u[14], kZero);
+          v[15] = _mm_cmplt_epi32(u[15], kZero);
+
+          u[0] = _mm_sub_epi32(u[0], v[0]);
+          u[1] = _mm_sub_epi32(u[1], v[1]);
+          u[2] = _mm_sub_epi32(u[2], v[2]);
+          u[3] = _mm_sub_epi32(u[3], v[3]);
+          u[4] = _mm_sub_epi32(u[4], v[4]);
+          u[5] = _mm_sub_epi32(u[5], v[5]);
+          u[6] = _mm_sub_epi32(u[6], v[6]);
+          u[7] = _mm_sub_epi32(u[7], v[7]);
+          u[8] = _mm_sub_epi32(u[8], v[8]);
+          u[9] = _mm_sub_epi32(u[9], v[9]);
+          u[10] = _mm_sub_epi32(u[10], v[10]);
+          u[11] = _mm_sub_epi32(u[11], v[11]);
+          u[12] = _mm_sub_epi32(u[12], v[12]);
+          u[13] = _mm_sub_epi32(u[13], v[13]);
+          u[14] = _mm_sub_epi32(u[14], v[14]);
+          u[15] = _mm_sub_epi32(u[15], v[15]);
+
+          v[0] = _mm_add_epi32(u[0], K32One);
+          v[1] = _mm_add_epi32(u[1], K32One);
+          v[2] = _mm_add_epi32(u[2], K32One);
+          v[3] = _mm_add_epi32(u[3], K32One);
+          v[4] = _mm_add_epi32(u[4], K32One);
+          v[5] = _mm_add_epi32(u[5], K32One);
+          v[6] = _mm_add_epi32(u[6], K32One);
+          v[7] = _mm_add_epi32(u[7], K32One);
+          v[8] = _mm_add_epi32(u[8], K32One);
+          v[9] = _mm_add_epi32(u[9], K32One);
+          v[10] = _mm_add_epi32(u[10], K32One);
+          v[11] = _mm_add_epi32(u[11], K32One);
+          v[12] = _mm_add_epi32(u[12], K32One);
+          v[13] = _mm_add_epi32(u[13], K32One);
+          v[14] = _mm_add_epi32(u[14], K32One);
+          v[15] = _mm_add_epi32(u[15], K32One);
+
+          u[0] = _mm_srai_epi32(v[0], 2);
+          u[1] = _mm_srai_epi32(v[1], 2);
+          u[2] = _mm_srai_epi32(v[2], 2);
+          u[3] = _mm_srai_epi32(v[3], 2);
+          u[4] = _mm_srai_epi32(v[4], 2);
+          u[5] = _mm_srai_epi32(v[5], 2);
+          u[6] = _mm_srai_epi32(v[6], 2);
+          u[7] = _mm_srai_epi32(v[7], 2);
+          u[8] = _mm_srai_epi32(v[8], 2);
+          u[9] = _mm_srai_epi32(v[9], 2);
+          u[10] = _mm_srai_epi32(v[10], 2);
+          u[11] = _mm_srai_epi32(v[11], 2);
+          u[12] = _mm_srai_epi32(v[12], 2);
+          u[13] = _mm_srai_epi32(v[13], 2);
+          u[14] = _mm_srai_epi32(v[14], 2);
+          u[15] = _mm_srai_epi32(v[15], 2);
+
+          out[5] = _mm_packs_epi32(u[0], u[1]);
+          out[21] = _mm_packs_epi32(u[2], u[3]);
+          out[13] = _mm_packs_epi32(u[4], u[5]);
+          out[29] = _mm_packs_epi32(u[6], u[7]);
+          out[3] = _mm_packs_epi32(u[8], u[9]);
+          out[19] = _mm_packs_epi32(u[10], u[11]);
+          out[11] = _mm_packs_epi32(u[12], u[13]);
+          out[27] = _mm_packs_epi32(u[14], u[15]);
+#if DCT_HIGH_BIT_DEPTH
+          overflow =
+              check_epi16_overflow_x8(&out[5], &out[21], &out[13], &out[29],
+                                      &out[3], &out[19], &out[11], &out[27]);
+          if (overflow) {
+            HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+      }
+#endif  // FDCT32x32_HIGH_PRECISION
+      // Transpose the results, do it as four 8x8 transposes.
+      {
+        int transpose_block;
+        int16_t *output0 = &intermediate[column_start * 32];
+        tran_low_t *output1 = &output_org[column_start * 32];
+        for (transpose_block = 0; transpose_block < 4; ++transpose_block) {
+          __m128i *this_out = &out[8 * transpose_block];
+          // 00 01 02 03 04 05 06 07
+          // 10 11 12 13 14 15 16 17
+          // 20 21 22 23 24 25 26 27
+          // 30 31 32 33 34 35 36 37
+          // 40 41 42 43 44 45 46 47
+          // 50 51 52 53 54 55 56 57
+          // 60 61 62 63 64 65 66 67
+          // 70 71 72 73 74 75 76 77
+          const __m128i tr0_0 = _mm_unpacklo_epi16(this_out[0], this_out[1]);
+          const __m128i tr0_1 = _mm_unpacklo_epi16(this_out[2], this_out[3]);
+          const __m128i tr0_2 = _mm_unpackhi_epi16(this_out[0], this_out[1]);
+          const __m128i tr0_3 = _mm_unpackhi_epi16(this_out[2], this_out[3]);
+          const __m128i tr0_4 = _mm_unpacklo_epi16(this_out[4], this_out[5]);
+          const __m128i tr0_5 = _mm_unpacklo_epi16(this_out[6], this_out[7]);
+          const __m128i tr0_6 = _mm_unpackhi_epi16(this_out[4], this_out[5]);
+          const __m128i tr0_7 = _mm_unpackhi_epi16(this_out[6], this_out[7]);
+          // 00 10 01 11 02 12 03 13
+          // 20 30 21 31 22 32 23 33
+          // 04 14 05 15 06 16 07 17
+          // 24 34 25 35 26 36 27 37
+          // 40 50 41 51 42 52 43 53
+          // 60 70 61 71 62 72 63 73
+          // 54 54 55 55 56 56 57 57
+          // 64 74 65 75 66 76 67 77
+          const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+          const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3);
+          const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
+          const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3);
+          const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5);
+          const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);
+          const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5);
+          const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);
+          // 00 10 20 30 01 11 21 31
+          // 40 50 60 70 41 51 61 71
+          // 02 12 22 32 03 13 23 33
+          // 42 52 62 72 43 53 63 73
+          // 04 14 24 34 05 15 21 36
+          // 44 54 64 74 45 55 61 76
+          // 06 16 26 36 07 17 27 37
+          // 46 56 66 76 47 57 67 77
+          __m128i tr2_0 = _mm_unpacklo_epi64(tr1_0, tr1_4);
+          __m128i tr2_1 = _mm_unpackhi_epi64(tr1_0, tr1_4);
+          __m128i tr2_2 = _mm_unpacklo_epi64(tr1_2, tr1_6);
+          __m128i tr2_3 = _mm_unpackhi_epi64(tr1_2, tr1_6);
+          __m128i tr2_4 = _mm_unpacklo_epi64(tr1_1, tr1_5);
+          __m128i tr2_5 = _mm_unpackhi_epi64(tr1_1, tr1_5);
+          __m128i tr2_6 = _mm_unpacklo_epi64(tr1_3, tr1_7);
+          __m128i tr2_7 = _mm_unpackhi_epi64(tr1_3, tr1_7);
+          // 00 10 20 30 40 50 60 70
+          // 01 11 21 31 41 51 61 71
+          // 02 12 22 32 42 52 62 72
+          // 03 13 23 33 43 53 63 73
+          // 04 14 24 34 44 54 64 74
+          // 05 15 25 35 45 55 65 75
+          // 06 16 26 36 46 56 66 76
+          // 07 17 27 37 47 57 67 77
+          if (0 == pass) {
+            // output[j] = (output[j] + 1 + (output[j] > 0)) >> 2;
+            // TODO(cd): see quality impact of only doing
+            //           output[j] = (output[j] + 1) >> 2;
+            //           which would remove the code between here ...
+            __m128i tr2_0_0 = _mm_cmpgt_epi16(tr2_0, kZero);
+            __m128i tr2_1_0 = _mm_cmpgt_epi16(tr2_1, kZero);
+            __m128i tr2_2_0 = _mm_cmpgt_epi16(tr2_2, kZero);
+            __m128i tr2_3_0 = _mm_cmpgt_epi16(tr2_3, kZero);
+            __m128i tr2_4_0 = _mm_cmpgt_epi16(tr2_4, kZero);
+            __m128i tr2_5_0 = _mm_cmpgt_epi16(tr2_5, kZero);
+            __m128i tr2_6_0 = _mm_cmpgt_epi16(tr2_6, kZero);
+            __m128i tr2_7_0 = _mm_cmpgt_epi16(tr2_7, kZero);
+            tr2_0 = _mm_sub_epi16(tr2_0, tr2_0_0);
+            tr2_1 = _mm_sub_epi16(tr2_1, tr2_1_0);
+            tr2_2 = _mm_sub_epi16(tr2_2, tr2_2_0);
+            tr2_3 = _mm_sub_epi16(tr2_3, tr2_3_0);
+            tr2_4 = _mm_sub_epi16(tr2_4, tr2_4_0);
+            tr2_5 = _mm_sub_epi16(tr2_5, tr2_5_0);
+            tr2_6 = _mm_sub_epi16(tr2_6, tr2_6_0);
+            tr2_7 = _mm_sub_epi16(tr2_7, tr2_7_0);
+            //           ... and here.
+            //           PS: also change code in av1/encoder/dct.c
+            tr2_0 = _mm_add_epi16(tr2_0, kOne);
+            tr2_1 = _mm_add_epi16(tr2_1, kOne);
+            tr2_2 = _mm_add_epi16(tr2_2, kOne);
+            tr2_3 = _mm_add_epi16(tr2_3, kOne);
+            tr2_4 = _mm_add_epi16(tr2_4, kOne);
+            tr2_5 = _mm_add_epi16(tr2_5, kOne);
+            tr2_6 = _mm_add_epi16(tr2_6, kOne);
+            tr2_7 = _mm_add_epi16(tr2_7, kOne);
+            tr2_0 = _mm_srai_epi16(tr2_0, 2);
+            tr2_1 = _mm_srai_epi16(tr2_1, 2);
+            tr2_2 = _mm_srai_epi16(tr2_2, 2);
+            tr2_3 = _mm_srai_epi16(tr2_3, 2);
+            tr2_4 = _mm_srai_epi16(tr2_4, 2);
+            tr2_5 = _mm_srai_epi16(tr2_5, 2);
+            tr2_6 = _mm_srai_epi16(tr2_6, 2);
+            tr2_7 = _mm_srai_epi16(tr2_7, 2);
+          }
+          // Note: even though all these stores are aligned, using the aligned
+          //       intrinsic make the code slightly slower.
+          if (pass == 0) {
+            _mm_storeu_si128((__m128i *)(output0 + 0 * 32), tr2_0);
+            _mm_storeu_si128((__m128i *)(output0 + 1 * 32), tr2_1);
+            _mm_storeu_si128((__m128i *)(output0 + 2 * 32), tr2_2);
+            _mm_storeu_si128((__m128i *)(output0 + 3 * 32), tr2_3);
+            _mm_storeu_si128((__m128i *)(output0 + 4 * 32), tr2_4);
+            _mm_storeu_si128((__m128i *)(output0 + 5 * 32), tr2_5);
+            _mm_storeu_si128((__m128i *)(output0 + 6 * 32), tr2_6);
+            _mm_storeu_si128((__m128i *)(output0 + 7 * 32), tr2_7);
+            // Process next 8x8
+            output0 += 8;
+          } else {
+            storeu_output(&tr2_0, (output1 + 0 * 32));
+            storeu_output(&tr2_1, (output1 + 1 * 32));
+            storeu_output(&tr2_2, (output1 + 2 * 32));
+            storeu_output(&tr2_3, (output1 + 3 * 32));
+            storeu_output(&tr2_4, (output1 + 4 * 32));
+            storeu_output(&tr2_5, (output1 + 5 * 32));
+            storeu_output(&tr2_6, (output1 + 6 * 32));
+            storeu_output(&tr2_7, (output1 + 7 * 32));
+            // Process next 8x8
+            output1 += 8;
+          }
+        }
+      }
+    }
+  }
+}  // NOLINT
+
+#undef ADD_EPI16
+#undef SUB_EPI16
+#undef HIGH_FDCT32x32_2D_C
+#undef HIGH_FDCT32x32_2D_ROWS_C
diff --git a/av1/common/x86/vp10_fwd_txfm1d_sse4.c b/av1/common/x86/vp10_fwd_txfm1d_sse4.c
new file mode 100644
index 0000000..902c9b2
--- /dev/null
+++ b/av1/common/x86/vp10_fwd_txfm1d_sse4.c
@@ -0,0 +1,1689 @@
+#include "av1/common/x86/vp10_txfm1d_sse4.h"
+
+void vp10_fdct4_new_sse4_1(const __m128i *input, __m128i *output,
+                           const int8_t *cos_bit, const int8_t *stage_range) {
+  const int txfm_size = 4;
+  const int num_per_128 = 4;
+  const int32_t *cospi;
+  __m128i buf0[4];
+  __m128i buf1[4];
+  int col_num = txfm_size / num_per_128;
+  int bit;
+  int col;
+  (void)stage_range;
+  for (col = 0; col < col_num; col++) {
+    // stage 0;
+    int32_t stage_idx = 0;
+    buf0[0] = input[0 * col_num + col];
+    buf0[1] = input[1 * col_num + col];
+    buf0[2] = input[2 * col_num + col];
+    buf0[3] = input[3 * col_num + col];
+
+    // stage 1
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = _mm_add_epi32(buf0[0], buf0[3]);
+    buf1[3] = _mm_sub_epi32(buf0[0], buf0[3]);
+    buf1[1] = _mm_add_epi32(buf0[1], buf0[2]);
+    buf1[2] = _mm_sub_epi32(buf0[1], buf0[2]);
+
+    // stage 2
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[0], buf1[1], buf0[0],
+                        buf0[1], bit);
+    btf_32_sse4_1_type1(cospi[48], cospi[16], buf1[2], buf1[3], buf0[2],
+                        buf0[3], bit);
+
+    // stage 3
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = buf0[0];
+    buf1[1] = buf0[2];
+    buf1[2] = buf0[1];
+    buf1[3] = buf0[3];
+
+    output[0 * col_num + col] = buf1[0];
+    output[1 * col_num + col] = buf1[1];
+    output[2 * col_num + col] = buf1[2];
+    output[3 * col_num + col] = buf1[3];
+  }
+}
+
+void vp10_fdct8_new_sse4_1(const __m128i *input, __m128i *output,
+                           const int8_t *cos_bit, const int8_t *stage_range) {
+  const int txfm_size = 8;
+  const int num_per_128 = 4;
+  const int32_t *cospi;
+  __m128i buf0[8];
+  __m128i buf1[8];
+  int col_num = txfm_size / num_per_128;
+  int bit;
+  int col;
+  (void)stage_range;
+  for (col = 0; col < col_num; col++) {
+    // stage 0;
+    int32_t stage_idx = 0;
+    buf0[0] = input[0 * col_num + col];
+    buf0[1] = input[1 * col_num + col];
+    buf0[2] = input[2 * col_num + col];
+    buf0[3] = input[3 * col_num + col];
+    buf0[4] = input[4 * col_num + col];
+    buf0[5] = input[5 * col_num + col];
+    buf0[6] = input[6 * col_num + col];
+    buf0[7] = input[7 * col_num + col];
+
+    // stage 1
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = _mm_add_epi32(buf0[0], buf0[7]);
+    buf1[7] = _mm_sub_epi32(buf0[0], buf0[7]);
+    buf1[1] = _mm_add_epi32(buf0[1], buf0[6]);
+    buf1[6] = _mm_sub_epi32(buf0[1], buf0[6]);
+    buf1[2] = _mm_add_epi32(buf0[2], buf0[5]);
+    buf1[5] = _mm_sub_epi32(buf0[2], buf0[5]);
+    buf1[3] = _mm_add_epi32(buf0[3], buf0[4]);
+    buf1[4] = _mm_sub_epi32(buf0[3], buf0[4]);
+
+    // stage 2
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf0[0] = _mm_add_epi32(buf1[0], buf1[3]);
+    buf0[3] = _mm_sub_epi32(buf1[0], buf1[3]);
+    buf0[1] = _mm_add_epi32(buf1[1], buf1[2]);
+    buf0[2] = _mm_sub_epi32(buf1[1], buf1[2]);
+    buf0[4] = buf1[4];
+    btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[5], buf1[6], buf0[5],
+                        buf0[6], bit);
+    buf0[7] = buf1[7];
+
+    // stage 3
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    btf_32_sse4_1_type0(cospi[32], cospi[32], buf0[0], buf0[1], buf1[0],
+                        buf1[1], bit);
+    btf_32_sse4_1_type1(cospi[48], cospi[16], buf0[2], buf0[3], buf1[2],
+                        buf1[3], bit);
+    buf1[4] = _mm_add_epi32(buf0[4], buf0[5]);
+    buf1[5] = _mm_sub_epi32(buf0[4], buf0[5]);
+    buf1[6] = _mm_sub_epi32(buf0[7], buf0[6]);
+    buf1[7] = _mm_add_epi32(buf0[7], buf0[6]);
+
+    // stage 4
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf0[0] = buf1[0];
+    buf0[1] = buf1[1];
+    buf0[2] = buf1[2];
+    buf0[3] = buf1[3];
+    btf_32_sse4_1_type1(cospi[56], cospi[8], buf1[4], buf1[7], buf0[4], buf0[7],
+                        bit);
+    btf_32_sse4_1_type1(cospi[24], cospi[40], buf1[5], buf1[6], buf0[5],
+                        buf0[6], bit);
+
+    // stage 5
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = buf0[0];
+    buf1[1] = buf0[4];
+    buf1[2] = buf0[2];
+    buf1[3] = buf0[6];
+    buf1[4] = buf0[1];
+    buf1[5] = buf0[5];
+    buf1[6] = buf0[3];
+    buf1[7] = buf0[7];
+
+    output[0 * col_num + col] = buf1[0];
+    output[1 * col_num + col] = buf1[1];
+    output[2 * col_num + col] = buf1[2];
+    output[3 * col_num + col] = buf1[3];
+    output[4 * col_num + col] = buf1[4];
+    output[5 * col_num + col] = buf1[5];
+    output[6 * col_num + col] = buf1[6];
+    output[7 * col_num + col] = buf1[7];
+  }
+}
+
+void vp10_fdct16_new_sse4_1(const __m128i *input, __m128i *output,
+                            const int8_t *cos_bit, const int8_t *stage_range) {
+  const int txfm_size = 16;
+  const int num_per_128 = 4;
+  const int32_t *cospi;
+  __m128i buf0[16];
+  __m128i buf1[16];
+  int col_num = txfm_size / num_per_128;
+  int bit;
+  int col;
+  (void)stage_range;
+  for (col = 0; col < col_num; col++) {
+    // stage 0;
+    int32_t stage_idx = 0;
+    buf0[0] = input[0 * col_num + col];
+    buf0[1] = input[1 * col_num + col];
+    buf0[2] = input[2 * col_num + col];
+    buf0[3] = input[3 * col_num + col];
+    buf0[4] = input[4 * col_num + col];
+    buf0[5] = input[5 * col_num + col];
+    buf0[6] = input[6 * col_num + col];
+    buf0[7] = input[7 * col_num + col];
+    buf0[8] = input[8 * col_num + col];
+    buf0[9] = input[9 * col_num + col];
+    buf0[10] = input[10 * col_num + col];
+    buf0[11] = input[11 * col_num + col];
+    buf0[12] = input[12 * col_num + col];
+    buf0[13] = input[13 * col_num + col];
+    buf0[14] = input[14 * col_num + col];
+    buf0[15] = input[15 * col_num + col];
+
+    // stage 1
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = _mm_add_epi32(buf0[0], buf0[15]);
+    buf1[15] = _mm_sub_epi32(buf0[0], buf0[15]);
+    buf1[1] = _mm_add_epi32(buf0[1], buf0[14]);
+    buf1[14] = _mm_sub_epi32(buf0[1], buf0[14]);
+    buf1[2] = _mm_add_epi32(buf0[2], buf0[13]);
+    buf1[13] = _mm_sub_epi32(buf0[2], buf0[13]);
+    buf1[3] = _mm_add_epi32(buf0[3], buf0[12]);
+    buf1[12] = _mm_sub_epi32(buf0[3], buf0[12]);
+    buf1[4] = _mm_add_epi32(buf0[4], buf0[11]);
+    buf1[11] = _mm_sub_epi32(buf0[4], buf0[11]);
+    buf1[5] = _mm_add_epi32(buf0[5], buf0[10]);
+    buf1[10] = _mm_sub_epi32(buf0[5], buf0[10]);
+    buf1[6] = _mm_add_epi32(buf0[6], buf0[9]);
+    buf1[9] = _mm_sub_epi32(buf0[6], buf0[9]);
+    buf1[7] = _mm_add_epi32(buf0[7], buf0[8]);
+    buf1[8] = _mm_sub_epi32(buf0[7], buf0[8]);
+
+    // stage 2
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf0[0] = _mm_add_epi32(buf1[0], buf1[7]);
+    buf0[7] = _mm_sub_epi32(buf1[0], buf1[7]);
+    buf0[1] = _mm_add_epi32(buf1[1], buf1[6]);
+    buf0[6] = _mm_sub_epi32(buf1[1], buf1[6]);
+    buf0[2] = _mm_add_epi32(buf1[2], buf1[5]);
+    buf0[5] = _mm_sub_epi32(buf1[2], buf1[5]);
+    buf0[3] = _mm_add_epi32(buf1[3], buf1[4]);
+    buf0[4] = _mm_sub_epi32(buf1[3], buf1[4]);
+    buf0[8] = buf1[8];
+    buf0[9] = buf1[9];
+    btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[10], buf1[13], buf0[10],
+                        buf0[13], bit);
+    btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[11], buf1[12], buf0[11],
+                        buf0[12], bit);
+    buf0[14] = buf1[14];
+    buf0[15] = buf1[15];
+
+    // stage 3
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = _mm_add_epi32(buf0[0], buf0[3]);
+    buf1[3] = _mm_sub_epi32(buf0[0], buf0[3]);
+    buf1[1] = _mm_add_epi32(buf0[1], buf0[2]);
+    buf1[2] = _mm_sub_epi32(buf0[1], buf0[2]);
+    buf1[4] = buf0[4];
+    btf_32_sse4_1_type0(-cospi[32], cospi[32], buf0[5], buf0[6], buf1[5],
+                        buf1[6], bit);
+    buf1[7] = buf0[7];
+    buf1[8] = _mm_add_epi32(buf0[8], buf0[11]);
+    buf1[11] = _mm_sub_epi32(buf0[8], buf0[11]);
+    buf1[9] = _mm_add_epi32(buf0[9], buf0[10]);
+    buf1[10] = _mm_sub_epi32(buf0[9], buf0[10]);
+    buf1[12] = _mm_sub_epi32(buf0[15], buf0[12]);
+    buf1[15] = _mm_add_epi32(buf0[15], buf0[12]);
+    buf1[13] = _mm_sub_epi32(buf0[14], buf0[13]);
+    buf1[14] = _mm_add_epi32(buf0[14], buf0[13]);
+
+    // stage 4
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[0], buf1[1], buf0[0],
+                        buf0[1], bit);
+    btf_32_sse4_1_type1(cospi[48], cospi[16], buf1[2], buf1[3], buf0[2],
+                        buf0[3], bit);
+    buf0[4] = _mm_add_epi32(buf1[4], buf1[5]);
+    buf0[5] = _mm_sub_epi32(buf1[4], buf1[5]);
+    buf0[6] = _mm_sub_epi32(buf1[7], buf1[6]);
+    buf0[7] = _mm_add_epi32(buf1[7], buf1[6]);
+    buf0[8] = buf1[8];
+    btf_32_sse4_1_type0(-cospi[16], cospi[48], buf1[9], buf1[14], buf0[9],
+                        buf0[14], bit);
+    btf_32_sse4_1_type0(-cospi[48], -cospi[16], buf1[10], buf1[13], buf0[10],
+                        buf0[13], bit);
+    buf0[11] = buf1[11];
+    buf0[12] = buf1[12];
+    buf0[15] = buf1[15];
+
+    // stage 5
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = buf0[0];
+    buf1[1] = buf0[1];
+    buf1[2] = buf0[2];
+    buf1[3] = buf0[3];
+    btf_32_sse4_1_type1(cospi[56], cospi[8], buf0[4], buf0[7], buf1[4], buf1[7],
+                        bit);
+    btf_32_sse4_1_type1(cospi[24], cospi[40], buf0[5], buf0[6], buf1[5],
+                        buf1[6], bit);
+    buf1[8] = _mm_add_epi32(buf0[8], buf0[9]);
+    buf1[9] = _mm_sub_epi32(buf0[8], buf0[9]);
+    buf1[10] = _mm_sub_epi32(buf0[11], buf0[10]);
+    buf1[11] = _mm_add_epi32(buf0[11], buf0[10]);
+    buf1[12] = _mm_add_epi32(buf0[12], buf0[13]);
+    buf1[13] = _mm_sub_epi32(buf0[12], buf0[13]);
+    buf1[14] = _mm_sub_epi32(buf0[15], buf0[14]);
+    buf1[15] = _mm_add_epi32(buf0[15], buf0[14]);
+
+    // stage 6
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf0[0] = buf1[0];
+    buf0[1] = buf1[1];
+    buf0[2] = buf1[2];
+    buf0[3] = buf1[3];
+    buf0[4] = buf1[4];
+    buf0[5] = buf1[5];
+    buf0[6] = buf1[6];
+    buf0[7] = buf1[7];
+    btf_32_sse4_1_type1(cospi[60], cospi[4], buf1[8], buf1[15], buf0[8],
+                        buf0[15], bit);
+    btf_32_sse4_1_type1(cospi[28], cospi[36], buf1[9], buf1[14], buf0[9],
+                        buf0[14], bit);
+    btf_32_sse4_1_type1(cospi[44], cospi[20], buf1[10], buf1[13], buf0[10],
+                        buf0[13], bit);
+    btf_32_sse4_1_type1(cospi[12], cospi[52], buf1[11], buf1[12], buf0[11],
+                        buf0[12], bit);
+
+    // stage 7
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = buf0[0];
+    buf1[1] = buf0[8];
+    buf1[2] = buf0[4];
+    buf1[3] = buf0[12];
+    buf1[4] = buf0[2];
+    buf1[5] = buf0[10];
+    buf1[6] = buf0[6];
+    buf1[7] = buf0[14];
+    buf1[8] = buf0[1];
+    buf1[9] = buf0[9];
+    buf1[10] = buf0[5];
+    buf1[11] = buf0[13];
+    buf1[12] = buf0[3];
+    buf1[13] = buf0[11];
+    buf1[14] = buf0[7];
+    buf1[15] = buf0[15];
+
+    output[0 * col_num + col] = buf1[0];
+    output[1 * col_num + col] = buf1[1];
+    output[2 * col_num + col] = buf1[2];
+    output[3 * col_num + col] = buf1[3];
+    output[4 * col_num + col] = buf1[4];
+    output[5 * col_num + col] = buf1[5];
+    output[6 * col_num + col] = buf1[6];
+    output[7 * col_num + col] = buf1[7];
+    output[8 * col_num + col] = buf1[8];
+    output[9 * col_num + col] = buf1[9];
+    output[10 * col_num + col] = buf1[10];
+    output[11 * col_num + col] = buf1[11];
+    output[12 * col_num + col] = buf1[12];
+    output[13 * col_num + col] = buf1[13];
+    output[14 * col_num + col] = buf1[14];
+    output[15 * col_num + col] = buf1[15];
+  }
+}
+
+void vp10_fdct32_new_sse4_1(const __m128i *input, __m128i *output,
+                            const int8_t *cos_bit, const int8_t *stage_range) {
+  const int txfm_size = 32;
+  const int num_per_128 = 4;
+  const int32_t *cospi;
+  __m128i buf0[32];
+  __m128i buf1[32];
+  int col_num = txfm_size / num_per_128;
+  int bit;
+  int col;
+  (void)stage_range;
+  for (col = 0; col < col_num; col++) {
+    // stage 0;
+    int32_t stage_idx = 0;
+    buf0[0] = input[0 * col_num + col];
+    buf0[1] = input[1 * col_num + col];
+    buf0[2] = input[2 * col_num + col];
+    buf0[3] = input[3 * col_num + col];
+    buf0[4] = input[4 * col_num + col];
+    buf0[5] = input[5 * col_num + col];
+    buf0[6] = input[6 * col_num + col];
+    buf0[7] = input[7 * col_num + col];
+    buf0[8] = input[8 * col_num + col];
+    buf0[9] = input[9 * col_num + col];
+    buf0[10] = input[10 * col_num + col];
+    buf0[11] = input[11 * col_num + col];
+    buf0[12] = input[12 * col_num + col];
+    buf0[13] = input[13 * col_num + col];
+    buf0[14] = input[14 * col_num + col];
+    buf0[15] = input[15 * col_num + col];
+    buf0[16] = input[16 * col_num + col];
+    buf0[17] = input[17 * col_num + col];
+    buf0[18] = input[18 * col_num + col];
+    buf0[19] = input[19 * col_num + col];
+    buf0[20] = input[20 * col_num + col];
+    buf0[21] = input[21 * col_num + col];
+    buf0[22] = input[22 * col_num + col];
+    buf0[23] = input[23 * col_num + col];
+    buf0[24] = input[24 * col_num + col];
+    buf0[25] = input[25 * col_num + col];
+    buf0[26] = input[26 * col_num + col];
+    buf0[27] = input[27 * col_num + col];
+    buf0[28] = input[28 * col_num + col];
+    buf0[29] = input[29 * col_num + col];
+    buf0[30] = input[30 * col_num + col];
+    buf0[31] = input[31 * col_num + col];
+
+    // stage 1
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = _mm_add_epi32(buf0[0], buf0[31]);
+    buf1[31] = _mm_sub_epi32(buf0[0], buf0[31]);
+    buf1[1] = _mm_add_epi32(buf0[1], buf0[30]);
+    buf1[30] = _mm_sub_epi32(buf0[1], buf0[30]);
+    buf1[2] = _mm_add_epi32(buf0[2], buf0[29]);
+    buf1[29] = _mm_sub_epi32(buf0[2], buf0[29]);
+    buf1[3] = _mm_add_epi32(buf0[3], buf0[28]);
+    buf1[28] = _mm_sub_epi32(buf0[3], buf0[28]);
+    buf1[4] = _mm_add_epi32(buf0[4], buf0[27]);
+    buf1[27] = _mm_sub_epi32(buf0[4], buf0[27]);
+    buf1[5] = _mm_add_epi32(buf0[5], buf0[26]);
+    buf1[26] = _mm_sub_epi32(buf0[5], buf0[26]);
+    buf1[6] = _mm_add_epi32(buf0[6], buf0[25]);
+    buf1[25] = _mm_sub_epi32(buf0[6], buf0[25]);
+    buf1[7] = _mm_add_epi32(buf0[7], buf0[24]);
+    buf1[24] = _mm_sub_epi32(buf0[7], buf0[24]);
+    buf1[8] = _mm_add_epi32(buf0[8], buf0[23]);
+    buf1[23] = _mm_sub_epi32(buf0[8], buf0[23]);
+    buf1[9] = _mm_add_epi32(buf0[9], buf0[22]);
+    buf1[22] = _mm_sub_epi32(buf0[9], buf0[22]);
+    buf1[10] = _mm_add_epi32(buf0[10], buf0[21]);
+    buf1[21] = _mm_sub_epi32(buf0[10], buf0[21]);
+    buf1[11] = _mm_add_epi32(buf0[11], buf0[20]);
+    buf1[20] = _mm_sub_epi32(buf0[11], buf0[20]);
+    buf1[12] = _mm_add_epi32(buf0[12], buf0[19]);
+    buf1[19] = _mm_sub_epi32(buf0[12], buf0[19]);
+    buf1[13] = _mm_add_epi32(buf0[13], buf0[18]);
+    buf1[18] = _mm_sub_epi32(buf0[13], buf0[18]);
+    buf1[14] = _mm_add_epi32(buf0[14], buf0[17]);
+    buf1[17] = _mm_sub_epi32(buf0[14], buf0[17]);
+    buf1[15] = _mm_add_epi32(buf0[15], buf0[16]);
+    buf1[16] = _mm_sub_epi32(buf0[15], buf0[16]);
+
+    // stage 2
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf0[0] = _mm_add_epi32(buf1[0], buf1[15]);
+    buf0[15] = _mm_sub_epi32(buf1[0], buf1[15]);
+    buf0[1] = _mm_add_epi32(buf1[1], buf1[14]);
+    buf0[14] = _mm_sub_epi32(buf1[1], buf1[14]);
+    buf0[2] = _mm_add_epi32(buf1[2], buf1[13]);
+    buf0[13] = _mm_sub_epi32(buf1[2], buf1[13]);
+    buf0[3] = _mm_add_epi32(buf1[3], buf1[12]);
+    buf0[12] = _mm_sub_epi32(buf1[3], buf1[12]);
+    buf0[4] = _mm_add_epi32(buf1[4], buf1[11]);
+    buf0[11] = _mm_sub_epi32(buf1[4], buf1[11]);
+    buf0[5] = _mm_add_epi32(buf1[5], buf1[10]);
+    buf0[10] = _mm_sub_epi32(buf1[5], buf1[10]);
+    buf0[6] = _mm_add_epi32(buf1[6], buf1[9]);
+    buf0[9] = _mm_sub_epi32(buf1[6], buf1[9]);
+    buf0[7] = _mm_add_epi32(buf1[7], buf1[8]);
+    buf0[8] = _mm_sub_epi32(buf1[7], buf1[8]);
+    buf0[16] = buf1[16];
+    buf0[17] = buf1[17];
+    buf0[18] = buf1[18];
+    buf0[19] = buf1[19];
+    btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[20], buf1[27], buf0[20],
+                        buf0[27], bit);
+    btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[21], buf1[26], buf0[21],
+                        buf0[26], bit);
+    btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[22], buf1[25], buf0[22],
+                        buf0[25], bit);
+    btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[23], buf1[24], buf0[23],
+                        buf0[24], bit);
+    buf0[28] = buf1[28];
+    buf0[29] = buf1[29];
+    buf0[30] = buf1[30];
+    buf0[31] = buf1[31];
+
+    // stage 3
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = _mm_add_epi32(buf0[0], buf0[7]);
+    buf1[7] = _mm_sub_epi32(buf0[0], buf0[7]);
+    buf1[1] = _mm_add_epi32(buf0[1], buf0[6]);
+    buf1[6] = _mm_sub_epi32(buf0[1], buf0[6]);
+    buf1[2] = _mm_add_epi32(buf0[2], buf0[5]);
+    buf1[5] = _mm_sub_epi32(buf0[2], buf0[5]);
+    buf1[3] = _mm_add_epi32(buf0[3], buf0[4]);
+    buf1[4] = _mm_sub_epi32(buf0[3], buf0[4]);
+    buf1[8] = buf0[8];
+    buf1[9] = buf0[9];
+    btf_32_sse4_1_type0(-cospi[32], cospi[32], buf0[10], buf0[13], buf1[10],
+                        buf1[13], bit);
+    btf_32_sse4_1_type0(-cospi[32], cospi[32], buf0[11], buf0[12], buf1[11],
+                        buf1[12], bit);
+    buf1[14] = buf0[14];
+    buf1[15] = buf0[15];
+    buf1[16] = _mm_add_epi32(buf0[16], buf0[23]);
+    buf1[23] = _mm_sub_epi32(buf0[16], buf0[23]);
+    buf1[17] = _mm_add_epi32(buf0[17], buf0[22]);
+    buf1[22] = _mm_sub_epi32(buf0[17], buf0[22]);
+    buf1[18] = _mm_add_epi32(buf0[18], buf0[21]);
+    buf1[21] = _mm_sub_epi32(buf0[18], buf0[21]);
+    buf1[19] = _mm_add_epi32(buf0[19], buf0[20]);
+    buf1[20] = _mm_sub_epi32(buf0[19], buf0[20]);
+    buf1[24] = _mm_sub_epi32(buf0[31], buf0[24]);
+    buf1[31] = _mm_add_epi32(buf0[31], buf0[24]);
+    buf1[25] = _mm_sub_epi32(buf0[30], buf0[25]);
+    buf1[30] = _mm_add_epi32(buf0[30], buf0[25]);
+    buf1[26] = _mm_sub_epi32(buf0[29], buf0[26]);
+    buf1[29] = _mm_add_epi32(buf0[29], buf0[26]);
+    buf1[27] = _mm_sub_epi32(buf0[28], buf0[27]);
+    buf1[28] = _mm_add_epi32(buf0[28], buf0[27]);
+
+    // stage 4
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf0[0] = _mm_add_epi32(buf1[0], buf1[3]);
+    buf0[3] = _mm_sub_epi32(buf1[0], buf1[3]);
+    buf0[1] = _mm_add_epi32(buf1[1], buf1[2]);
+    buf0[2] = _mm_sub_epi32(buf1[1], buf1[2]);
+    buf0[4] = buf1[4];
+    btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[5], buf1[6], buf0[5],
+                        buf0[6], bit);
+    buf0[7] = buf1[7];
+    buf0[8] = _mm_add_epi32(buf1[8], buf1[11]);
+    buf0[11] = _mm_sub_epi32(buf1[8], buf1[11]);
+    buf0[9] = _mm_add_epi32(buf1[9], buf1[10]);
+    buf0[10] = _mm_sub_epi32(buf1[9], buf1[10]);
+    buf0[12] = _mm_sub_epi32(buf1[15], buf1[12]);
+    buf0[15] = _mm_add_epi32(buf1[15], buf1[12]);
+    buf0[13] = _mm_sub_epi32(buf1[14], buf1[13]);
+    buf0[14] = _mm_add_epi32(buf1[14], buf1[13]);
+    buf0[16] = buf1[16];
+    buf0[17] = buf1[17];
+    btf_32_sse4_1_type0(-cospi[16], cospi[48], buf1[18], buf1[29], buf0[18],
+                        buf0[29], bit);
+    btf_32_sse4_1_type0(-cospi[16], cospi[48], buf1[19], buf1[28], buf0[19],
+                        buf0[28], bit);
+    btf_32_sse4_1_type0(-cospi[48], -cospi[16], buf1[20], buf1[27], buf0[20],
+                        buf0[27], bit);
+    btf_32_sse4_1_type0(-cospi[48], -cospi[16], buf1[21], buf1[26], buf0[21],
+                        buf0[26], bit);
+    buf0[22] = buf1[22];
+    buf0[23] = buf1[23];
+    buf0[24] = buf1[24];
+    buf0[25] = buf1[25];
+    buf0[30] = buf1[30];
+    buf0[31] = buf1[31];
+
+    // stage 5
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    btf_32_sse4_1_type0(cospi[32], cospi[32], buf0[0], buf0[1], buf1[0],
+                        buf1[1], bit);
+    btf_32_sse4_1_type1(cospi[48], cospi[16], buf0[2], buf0[3], buf1[2],
+                        buf1[3], bit);
+    buf1[4] = _mm_add_epi32(buf0[4], buf0[5]);
+    buf1[5] = _mm_sub_epi32(buf0[4], buf0[5]);
+    buf1[6] = _mm_sub_epi32(buf0[7], buf0[6]);
+    buf1[7] = _mm_add_epi32(buf0[7], buf0[6]);
+    buf1[8] = buf0[8];
+    btf_32_sse4_1_type0(-cospi[16], cospi[48], buf0[9], buf0[14], buf1[9],
+                        buf1[14], bit);
+    btf_32_sse4_1_type0(-cospi[48], -cospi[16], buf0[10], buf0[13], buf1[10],
+                        buf1[13], bit);
+    buf1[11] = buf0[11];
+    buf1[12] = buf0[12];
+    buf1[15] = buf0[15];
+    buf1[16] = _mm_add_epi32(buf0[16], buf0[19]);
+    buf1[19] = _mm_sub_epi32(buf0[16], buf0[19]);
+    buf1[17] = _mm_add_epi32(buf0[17], buf0[18]);
+    buf1[18] = _mm_sub_epi32(buf0[17], buf0[18]);
+    buf1[20] = _mm_sub_epi32(buf0[23], buf0[20]);
+    buf1[23] = _mm_add_epi32(buf0[23], buf0[20]);
+    buf1[21] = _mm_sub_epi32(buf0[22], buf0[21]);
+    buf1[22] = _mm_add_epi32(buf0[22], buf0[21]);
+    buf1[24] = _mm_add_epi32(buf0[24], buf0[27]);
+    buf1[27] = _mm_sub_epi32(buf0[24], buf0[27]);
+    buf1[25] = _mm_add_epi32(buf0[25], buf0[26]);
+    buf1[26] = _mm_sub_epi32(buf0[25], buf0[26]);
+    buf1[28] = _mm_sub_epi32(buf0[31], buf0[28]);
+    buf1[31] = _mm_add_epi32(buf0[31], buf0[28]);
+    buf1[29] = _mm_sub_epi32(buf0[30], buf0[29]);
+    buf1[30] = _mm_add_epi32(buf0[30], buf0[29]);
+
+    // stage 6
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf0[0] = buf1[0];
+    buf0[1] = buf1[1];
+    buf0[2] = buf1[2];
+    buf0[3] = buf1[3];
+    btf_32_sse4_1_type1(cospi[56], cospi[8], buf1[4], buf1[7], buf0[4], buf0[7],
+                        bit);
+    btf_32_sse4_1_type1(cospi[24], cospi[40], buf1[5], buf1[6], buf0[5],
+                        buf0[6], bit);
+    buf0[8] = _mm_add_epi32(buf1[8], buf1[9]);
+    buf0[9] = _mm_sub_epi32(buf1[8], buf1[9]);
+    buf0[10] = _mm_sub_epi32(buf1[11], buf1[10]);
+    buf0[11] = _mm_add_epi32(buf1[11], buf1[10]);
+    buf0[12] = _mm_add_epi32(buf1[12], buf1[13]);
+    buf0[13] = _mm_sub_epi32(buf1[12], buf1[13]);
+    buf0[14] = _mm_sub_epi32(buf1[15], buf1[14]);
+    buf0[15] = _mm_add_epi32(buf1[15], buf1[14]);
+    buf0[16] = buf1[16];
+    btf_32_sse4_1_type0(-cospi[8], cospi[56], buf1[17], buf1[30], buf0[17],
+                        buf0[30], bit);
+    btf_32_sse4_1_type0(-cospi[56], -cospi[8], buf1[18], buf1[29], buf0[18],
+                        buf0[29], bit);
+    buf0[19] = buf1[19];
+    buf0[20] = buf1[20];
+    btf_32_sse4_1_type0(-cospi[40], cospi[24], buf1[21], buf1[26], buf0[21],
+                        buf0[26], bit);
+    btf_32_sse4_1_type0(-cospi[24], -cospi[40], buf1[22], buf1[25], buf0[22],
+                        buf0[25], bit);
+    buf0[23] = buf1[23];
+    buf0[24] = buf1[24];
+    buf0[27] = buf1[27];
+    buf0[28] = buf1[28];
+    buf0[31] = buf1[31];
+
+    // stage 7
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = buf0[0];
+    buf1[1] = buf0[1];
+    buf1[2] = buf0[2];
+    buf1[3] = buf0[3];
+    buf1[4] = buf0[4];
+    buf1[5] = buf0[5];
+    buf1[6] = buf0[6];
+    buf1[7] = buf0[7];
+    btf_32_sse4_1_type1(cospi[60], cospi[4], buf0[8], buf0[15], buf1[8],
+                        buf1[15], bit);
+    btf_32_sse4_1_type1(cospi[28], cospi[36], buf0[9], buf0[14], buf1[9],
+                        buf1[14], bit);
+    btf_32_sse4_1_type1(cospi[44], cospi[20], buf0[10], buf0[13], buf1[10],
+                        buf1[13], bit);
+    btf_32_sse4_1_type1(cospi[12], cospi[52], buf0[11], buf0[12], buf1[11],
+                        buf1[12], bit);
+    buf1[16] = _mm_add_epi32(buf0[16], buf0[17]);
+    buf1[17] = _mm_sub_epi32(buf0[16], buf0[17]);
+    buf1[18] = _mm_sub_epi32(buf0[19], buf0[18]);
+    buf1[19] = _mm_add_epi32(buf0[19], buf0[18]);
+    buf1[20] = _mm_add_epi32(buf0[20], buf0[21]);
+    buf1[21] = _mm_sub_epi32(buf0[20], buf0[21]);
+    buf1[22] = _mm_sub_epi32(buf0[23], buf0[22]);
+    buf1[23] = _mm_add_epi32(buf0[23], buf0[22]);
+    buf1[24] = _mm_add_epi32(buf0[24], buf0[25]);
+    buf1[25] = _mm_sub_epi32(buf0[24], buf0[25]);
+    buf1[26] = _mm_sub_epi32(buf0[27], buf0[26]);
+    buf1[27] = _mm_add_epi32(buf0[27], buf0[26]);
+    buf1[28] = _mm_add_epi32(buf0[28], buf0[29]);
+    buf1[29] = _mm_sub_epi32(buf0[28], buf0[29]);
+    buf1[30] = _mm_sub_epi32(buf0[31], buf0[30]);
+    buf1[31] = _mm_add_epi32(buf0[31], buf0[30]);
+
+    // stage 8
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf0[0] = buf1[0];
+    buf0[1] = buf1[1];
+    buf0[2] = buf1[2];
+    buf0[3] = buf1[3];
+    buf0[4] = buf1[4];
+    buf0[5] = buf1[5];
+    buf0[6] = buf1[6];
+    buf0[7] = buf1[7];
+    buf0[8] = buf1[8];
+    buf0[9] = buf1[9];
+    buf0[10] = buf1[10];
+    buf0[11] = buf1[11];
+    buf0[12] = buf1[12];
+    buf0[13] = buf1[13];
+    buf0[14] = buf1[14];
+    buf0[15] = buf1[15];
+    btf_32_sse4_1_type1(cospi[62], cospi[2], buf1[16], buf1[31], buf0[16],
+                        buf0[31], bit);
+    btf_32_sse4_1_type1(cospi[30], cospi[34], buf1[17], buf1[30], buf0[17],
+                        buf0[30], bit);
+    btf_32_sse4_1_type1(cospi[46], cospi[18], buf1[18], buf1[29], buf0[18],
+                        buf0[29], bit);
+    btf_32_sse4_1_type1(cospi[14], cospi[50], buf1[19], buf1[28], buf0[19],
+                        buf0[28], bit);
+    btf_32_sse4_1_type1(cospi[54], cospi[10], buf1[20], buf1[27], buf0[20],
+                        buf0[27], bit);
+    btf_32_sse4_1_type1(cospi[22], cospi[42], buf1[21], buf1[26], buf0[21],
+                        buf0[26], bit);
+    btf_32_sse4_1_type1(cospi[38], cospi[26], buf1[22], buf1[25], buf0[22],
+                        buf0[25], bit);
+    btf_32_sse4_1_type1(cospi[6], cospi[58], buf1[23], buf1[24], buf0[23],
+                        buf0[24], bit);
+
+    // stage 9
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = buf0[0];
+    buf1[1] = buf0[16];
+    buf1[2] = buf0[8];
+    buf1[3] = buf0[24];
+    buf1[4] = buf0[4];
+    buf1[5] = buf0[20];
+    buf1[6] = buf0[12];
+    buf1[7] = buf0[28];
+    buf1[8] = buf0[2];
+    buf1[9] = buf0[18];
+    buf1[10] = buf0[10];
+    buf1[11] = buf0[26];
+    buf1[12] = buf0[6];
+    buf1[13] = buf0[22];
+    buf1[14] = buf0[14];
+    buf1[15] = buf0[30];
+    buf1[16] = buf0[1];
+    buf1[17] = buf0[17];
+    buf1[18] = buf0[9];
+    buf1[19] = buf0[25];
+    buf1[20] = buf0[5];
+    buf1[21] = buf0[21];
+    buf1[22] = buf0[13];
+    buf1[23] = buf0[29];
+    buf1[24] = buf0[3];
+    buf1[25] = buf0[19];
+    buf1[26] = buf0[11];
+    buf1[27] = buf0[27];
+    buf1[28] = buf0[7];
+    buf1[29] = buf0[23];
+    buf1[30] = buf0[15];
+    buf1[31] = buf0[31];
+
+    output[0 * col_num + col] = buf1[0];
+    output[1 * col_num + col] = buf1[1];
+    output[2 * col_num + col] = buf1[2];
+    output[3 * col_num + col] = buf1[3];
+    output[4 * col_num + col] = buf1[4];
+    output[5 * col_num + col] = buf1[5];
+    output[6 * col_num + col] = buf1[6];
+    output[7 * col_num + col] = buf1[7];
+    output[8 * col_num + col] = buf1[8];
+    output[9 * col_num + col] = buf1[9];
+    output[10 * col_num + col] = buf1[10];
+    output[11 * col_num + col] = buf1[11];
+    output[12 * col_num + col] = buf1[12];
+    output[13 * col_num + col] = buf1[13];
+    output[14 * col_num + col] = buf1[14];
+    output[15 * col_num + col] = buf1[15];
+    output[16 * col_num + col] = buf1[16];
+    output[17 * col_num + col] = buf1[17];
+    output[18 * col_num + col] = buf1[18];
+    output[19 * col_num + col] = buf1[19];
+    output[20 * col_num + col] = buf1[20];
+    output[21 * col_num + col] = buf1[21];
+    output[22 * col_num + col] = buf1[22];
+    output[23 * col_num + col] = buf1[23];
+    output[24 * col_num + col] = buf1[24];
+    output[25 * col_num + col] = buf1[25];
+    output[26 * col_num + col] = buf1[26];
+    output[27 * col_num + col] = buf1[27];
+    output[28 * col_num + col] = buf1[28];
+    output[29 * col_num + col] = buf1[29];
+    output[30 * col_num + col] = buf1[30];
+    output[31 * col_num + col] = buf1[31];
+  }
+}
+
+void vp10_fadst4_new_sse4_1(const __m128i *input, __m128i *output,
+                            const int8_t *cos_bit, const int8_t *stage_range) {
+  const int txfm_size = 4;
+  const int num_per_128 = 4;
+  const int32_t *cospi;
+  __m128i buf0[4];
+  __m128i buf1[4];
+  int col_num = txfm_size / num_per_128;
+  int bit;
+  int col;
+  (void)stage_range;
+  for (col = 0; col < col_num; col++) {
+    // stage 0;
+    int32_t stage_idx = 0;
+    buf0[0] = input[0 * col_num + col];
+    buf0[1] = input[1 * col_num + col];
+    buf0[2] = input[2 * col_num + col];
+    buf0[3] = input[3 * col_num + col];
+
+    // stage 1
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = buf0[3];
+    buf1[1] = buf0[0];
+    buf1[2] = buf0[1];
+    buf1[3] = buf0[2];
+
+    // stage 2
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    btf_32_sse4_1_type0(cospi[8], cospi[56], buf1[0], buf1[1], buf0[0], buf0[1],
+                        bit);
+    btf_32_sse4_1_type0(cospi[40], cospi[24], buf1[2], buf1[3], buf0[2],
+                        buf0[3], bit);
+
+    // stage 3
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = _mm_add_epi32(buf0[0], buf0[2]);
+    buf1[2] = _mm_sub_epi32(buf0[0], buf0[2]);
+    buf1[1] = _mm_add_epi32(buf0[1], buf0[3]);
+    buf1[3] = _mm_sub_epi32(buf0[1], buf0[3]);
+
+    // stage 4
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf0[0] = buf1[0];
+    buf0[1] = buf1[1];
+    btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[2], buf1[3], buf0[2],
+                        buf0[3], bit);
+
+    // stage 5
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = buf0[0];
+    buf1[1] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[2]);
+    buf1[2] = buf0[3];
+    buf1[3] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[1]);
+
+    output[0 * col_num + col] = buf1[0];
+    output[1 * col_num + col] = buf1[1];
+    output[2 * col_num + col] = buf1[2];
+    output[3 * col_num + col] = buf1[3];
+  }
+}
+
+void vp10_fadst8_new_sse4_1(const __m128i *input, __m128i *output,
+                            const int8_t *cos_bit, const int8_t *stage_range) {
+  const int txfm_size = 8;
+  const int num_per_128 = 4;
+  const int32_t *cospi;
+  __m128i buf0[8];
+  __m128i buf1[8];
+  int col_num = txfm_size / num_per_128;
+  int bit;
+  int col;
+  (void)stage_range;
+  for (col = 0; col < col_num; col++) {
+    // stage 0;
+    int32_t stage_idx = 0;
+    buf0[0] = input[0 * col_num + col];
+    buf0[1] = input[1 * col_num + col];
+    buf0[2] = input[2 * col_num + col];
+    buf0[3] = input[3 * col_num + col];
+    buf0[4] = input[4 * col_num + col];
+    buf0[5] = input[5 * col_num + col];
+    buf0[6] = input[6 * col_num + col];
+    buf0[7] = input[7 * col_num + col];
+
+    // stage 1
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = buf0[7];
+    buf1[1] = buf0[0];
+    buf1[2] = buf0[5];
+    buf1[3] = buf0[2];
+    buf1[4] = buf0[3];
+    buf1[5] = buf0[4];
+    buf1[6] = buf0[1];
+    buf1[7] = buf0[6];
+
+    // stage 2
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    btf_32_sse4_1_type0(cospi[4], cospi[60], buf1[0], buf1[1], buf0[0], buf0[1],
+                        bit);
+    btf_32_sse4_1_type0(cospi[20], cospi[44], buf1[2], buf1[3], buf0[2],
+                        buf0[3], bit);
+    btf_32_sse4_1_type0(cospi[36], cospi[28], buf1[4], buf1[5], buf0[4],
+                        buf0[5], bit);
+    btf_32_sse4_1_type0(cospi[52], cospi[12], buf1[6], buf1[7], buf0[6],
+                        buf0[7], bit);
+
+    // stage 3
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = _mm_add_epi32(buf0[0], buf0[4]);
+    buf1[4] = _mm_sub_epi32(buf0[0], buf0[4]);
+    buf1[1] = _mm_add_epi32(buf0[1], buf0[5]);
+    buf1[5] = _mm_sub_epi32(buf0[1], buf0[5]);
+    buf1[2] = _mm_add_epi32(buf0[2], buf0[6]);
+    buf1[6] = _mm_sub_epi32(buf0[2], buf0[6]);
+    buf1[3] = _mm_add_epi32(buf0[3], buf0[7]);
+    buf1[7] = _mm_sub_epi32(buf0[3], buf0[7]);
+
+    // stage 4
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf0[0] = buf1[0];
+    buf0[1] = buf1[1];
+    buf0[2] = buf1[2];
+    buf0[3] = buf1[3];
+    btf_32_sse4_1_type0(cospi[16], cospi[48], buf1[4], buf1[5], buf0[4],
+                        buf0[5], bit);
+    btf_32_sse4_1_type0(-cospi[48], cospi[16], buf1[6], buf1[7], buf0[6],
+                        buf0[7], bit);
+
+    // stage 5
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = _mm_add_epi32(buf0[0], buf0[2]);
+    buf1[2] = _mm_sub_epi32(buf0[0], buf0[2]);
+    buf1[1] = _mm_add_epi32(buf0[1], buf0[3]);
+    buf1[3] = _mm_sub_epi32(buf0[1], buf0[3]);
+    buf1[4] = _mm_add_epi32(buf0[4], buf0[6]);
+    buf1[6] = _mm_sub_epi32(buf0[4], buf0[6]);
+    buf1[5] = _mm_add_epi32(buf0[5], buf0[7]);
+    buf1[7] = _mm_sub_epi32(buf0[5], buf0[7]);
+
+    // stage 6
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf0[0] = buf1[0];
+    buf0[1] = buf1[1];
+    btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[2], buf1[3], buf0[2],
+                        buf0[3], bit);
+    buf0[4] = buf1[4];
+    buf0[5] = buf1[5];
+    btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[6], buf1[7], buf0[6],
+                        buf0[7], bit);
+
+    // stage 7
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = buf0[0];
+    buf1[1] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[4]);
+    buf1[2] = buf0[6];
+    buf1[3] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[2]);
+    buf1[4] = buf0[3];
+    buf1[5] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[7]);
+    buf1[6] = buf0[5];
+    buf1[7] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[1]);
+
+    output[0 * col_num + col] = buf1[0];
+    output[1 * col_num + col] = buf1[1];
+    output[2 * col_num + col] = buf1[2];
+    output[3 * col_num + col] = buf1[3];
+    output[4 * col_num + col] = buf1[4];
+    output[5 * col_num + col] = buf1[5];
+    output[6 * col_num + col] = buf1[6];
+    output[7 * col_num + col] = buf1[7];
+  }
+}
+
+void vp10_fadst16_new_sse4_1(const __m128i *input, __m128i *output,
+                             const int8_t *cos_bit, const int8_t *stage_range) {
+  const int txfm_size = 16;
+  const int num_per_128 = 4;
+  const int32_t *cospi;
+  __m128i buf0[16];
+  __m128i buf1[16];
+  int col_num = txfm_size / num_per_128;
+  int bit;
+  int col;
+  (void)stage_range;
+  for (col = 0; col < col_num; col++) {
+    // stage 0;
+    int32_t stage_idx = 0;
+    buf0[0] = input[0 * col_num + col];
+    buf0[1] = input[1 * col_num + col];
+    buf0[2] = input[2 * col_num + col];
+    buf0[3] = input[3 * col_num + col];
+    buf0[4] = input[4 * col_num + col];
+    buf0[5] = input[5 * col_num + col];
+    buf0[6] = input[6 * col_num + col];
+    buf0[7] = input[7 * col_num + col];
+    buf0[8] = input[8 * col_num + col];
+    buf0[9] = input[9 * col_num + col];
+    buf0[10] = input[10 * col_num + col];
+    buf0[11] = input[11 * col_num + col];
+    buf0[12] = input[12 * col_num + col];
+    buf0[13] = input[13 * col_num + col];
+    buf0[14] = input[14 * col_num + col];
+    buf0[15] = input[15 * col_num + col];
+
+    // stage 1
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = buf0[15];
+    buf1[1] = buf0[0];
+    buf1[2] = buf0[13];
+    buf1[3] = buf0[2];
+    buf1[4] = buf0[11];
+    buf1[5] = buf0[4];
+    buf1[6] = buf0[9];
+    buf1[7] = buf0[6];
+    buf1[8] = buf0[7];
+    buf1[9] = buf0[8];
+    buf1[10] = buf0[5];
+    buf1[11] = buf0[10];
+    buf1[12] = buf0[3];
+    buf1[13] = buf0[12];
+    buf1[14] = buf0[1];
+    buf1[15] = buf0[14];
+
+    // stage 2
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    btf_32_sse4_1_type0(cospi[2], cospi[62], buf1[0], buf1[1], buf0[0], buf0[1],
+                        bit);
+    btf_32_sse4_1_type0(cospi[10], cospi[54], buf1[2], buf1[3], buf0[2],
+                        buf0[3], bit);
+    btf_32_sse4_1_type0(cospi[18], cospi[46], buf1[4], buf1[5], buf0[4],
+                        buf0[5], bit);
+    btf_32_sse4_1_type0(cospi[26], cospi[38], buf1[6], buf1[7], buf0[6],
+                        buf0[7], bit);
+    btf_32_sse4_1_type0(cospi[34], cospi[30], buf1[8], buf1[9], buf0[8],
+                        buf0[9], bit);
+    btf_32_sse4_1_type0(cospi[42], cospi[22], buf1[10], buf1[11], buf0[10],
+                        buf0[11], bit);
+    btf_32_sse4_1_type0(cospi[50], cospi[14], buf1[12], buf1[13], buf0[12],
+                        buf0[13], bit);
+    btf_32_sse4_1_type0(cospi[58], cospi[6], buf1[14], buf1[15], buf0[14],
+                        buf0[15], bit);
+
+    // stage 3
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = _mm_add_epi32(buf0[0], buf0[8]);
+    buf1[8] = _mm_sub_epi32(buf0[0], buf0[8]);
+    buf1[1] = _mm_add_epi32(buf0[1], buf0[9]);
+    buf1[9] = _mm_sub_epi32(buf0[1], buf0[9]);
+    buf1[2] = _mm_add_epi32(buf0[2], buf0[10]);
+    buf1[10] = _mm_sub_epi32(buf0[2], buf0[10]);
+    buf1[3] = _mm_add_epi32(buf0[3], buf0[11]);
+    buf1[11] = _mm_sub_epi32(buf0[3], buf0[11]);
+    buf1[4] = _mm_add_epi32(buf0[4], buf0[12]);
+    buf1[12] = _mm_sub_epi32(buf0[4], buf0[12]);
+    buf1[5] = _mm_add_epi32(buf0[5], buf0[13]);
+    buf1[13] = _mm_sub_epi32(buf0[5], buf0[13]);
+    buf1[6] = _mm_add_epi32(buf0[6], buf0[14]);
+    buf1[14] = _mm_sub_epi32(buf0[6], buf0[14]);
+    buf1[7] = _mm_add_epi32(buf0[7], buf0[15]);
+    buf1[15] = _mm_sub_epi32(buf0[7], buf0[15]);
+
+    // stage 4
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf0[0] = buf1[0];
+    buf0[1] = buf1[1];
+    buf0[2] = buf1[2];
+    buf0[3] = buf1[3];
+    buf0[4] = buf1[4];
+    buf0[5] = buf1[5];
+    buf0[6] = buf1[6];
+    buf0[7] = buf1[7];
+    btf_32_sse4_1_type0(cospi[8], cospi[56], buf1[8], buf1[9], buf0[8], buf0[9],
+                        bit);
+    btf_32_sse4_1_type0(cospi[40], cospi[24], buf1[10], buf1[11], buf0[10],
+                        buf0[11], bit);
+    btf_32_sse4_1_type0(-cospi[56], cospi[8], buf1[12], buf1[13], buf0[12],
+                        buf0[13], bit);
+    btf_32_sse4_1_type0(-cospi[24], cospi[40], buf1[14], buf1[15], buf0[14],
+                        buf0[15], bit);
+
+    // stage 5
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = _mm_add_epi32(buf0[0], buf0[4]);
+    buf1[4] = _mm_sub_epi32(buf0[0], buf0[4]);
+    buf1[1] = _mm_add_epi32(buf0[1], buf0[5]);
+    buf1[5] = _mm_sub_epi32(buf0[1], buf0[5]);
+    buf1[2] = _mm_add_epi32(buf0[2], buf0[6]);
+    buf1[6] = _mm_sub_epi32(buf0[2], buf0[6]);
+    buf1[3] = _mm_add_epi32(buf0[3], buf0[7]);
+    buf1[7] = _mm_sub_epi32(buf0[3], buf0[7]);
+    buf1[8] = _mm_add_epi32(buf0[8], buf0[12]);
+    buf1[12] = _mm_sub_epi32(buf0[8], buf0[12]);
+    buf1[9] = _mm_add_epi32(buf0[9], buf0[13]);
+    buf1[13] = _mm_sub_epi32(buf0[9], buf0[13]);
+    buf1[10] = _mm_add_epi32(buf0[10], buf0[14]);
+    buf1[14] = _mm_sub_epi32(buf0[10], buf0[14]);
+    buf1[11] = _mm_add_epi32(buf0[11], buf0[15]);
+    buf1[15] = _mm_sub_epi32(buf0[11], buf0[15]);
+
+    // stage 6
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf0[0] = buf1[0];
+    buf0[1] = buf1[1];
+    buf0[2] = buf1[2];
+    buf0[3] = buf1[3];
+    btf_32_sse4_1_type0(cospi[16], cospi[48], buf1[4], buf1[5], buf0[4],
+                        buf0[5], bit);
+    btf_32_sse4_1_type0(-cospi[48], cospi[16], buf1[6], buf1[7], buf0[6],
+                        buf0[7], bit);
+    buf0[8] = buf1[8];
+    buf0[9] = buf1[9];
+    buf0[10] = buf1[10];
+    buf0[11] = buf1[11];
+    btf_32_sse4_1_type0(cospi[16], cospi[48], buf1[12], buf1[13], buf0[12],
+                        buf0[13], bit);
+    btf_32_sse4_1_type0(-cospi[48], cospi[16], buf1[14], buf1[15], buf0[14],
+                        buf0[15], bit);
+
+    // stage 7
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = _mm_add_epi32(buf0[0], buf0[2]);
+    buf1[2] = _mm_sub_epi32(buf0[0], buf0[2]);
+    buf1[1] = _mm_add_epi32(buf0[1], buf0[3]);
+    buf1[3] = _mm_sub_epi32(buf0[1], buf0[3]);
+    buf1[4] = _mm_add_epi32(buf0[4], buf0[6]);
+    buf1[6] = _mm_sub_epi32(buf0[4], buf0[6]);
+    buf1[5] = _mm_add_epi32(buf0[5], buf0[7]);
+    buf1[7] = _mm_sub_epi32(buf0[5], buf0[7]);
+    buf1[8] = _mm_add_epi32(buf0[8], buf0[10]);
+    buf1[10] = _mm_sub_epi32(buf0[8], buf0[10]);
+    buf1[9] = _mm_add_epi32(buf0[9], buf0[11]);
+    buf1[11] = _mm_sub_epi32(buf0[9], buf0[11]);
+    buf1[12] = _mm_add_epi32(buf0[12], buf0[14]);
+    buf1[14] = _mm_sub_epi32(buf0[12], buf0[14]);
+    buf1[13] = _mm_add_epi32(buf0[13], buf0[15]);
+    buf1[15] = _mm_sub_epi32(buf0[13], buf0[15]);
+
+    // stage 8
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf0[0] = buf1[0];
+    buf0[1] = buf1[1];
+    btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[2], buf1[3], buf0[2],
+                        buf0[3], bit);
+    buf0[4] = buf1[4];
+    buf0[5] = buf1[5];
+    btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[6], buf1[7], buf0[6],
+                        buf0[7], bit);
+    buf0[8] = buf1[8];
+    buf0[9] = buf1[9];
+    btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[10], buf1[11], buf0[10],
+                        buf0[11], bit);
+    buf0[12] = buf1[12];
+    buf0[13] = buf1[13];
+    btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[14], buf1[15], buf0[14],
+                        buf0[15], bit);
+
+    // stage 9
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = buf0[0];
+    buf1[1] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[8]);
+    buf1[2] = buf0[12];
+    buf1[3] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[4]);
+    buf1[4] = buf0[6];
+    buf1[5] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[14]);
+    buf1[6] = buf0[10];
+    buf1[7] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[2]);
+    buf1[8] = buf0[3];
+    buf1[9] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[11]);
+    buf1[10] = buf0[15];
+    buf1[11] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[7]);
+    buf1[12] = buf0[5];
+    buf1[13] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[13]);
+    buf1[14] = buf0[9];
+    buf1[15] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[1]);
+
+    output[0 * col_num + col] = buf1[0];
+    output[1 * col_num + col] = buf1[1];
+    output[2 * col_num + col] = buf1[2];
+    output[3 * col_num + col] = buf1[3];
+    output[4 * col_num + col] = buf1[4];
+    output[5 * col_num + col] = buf1[5];
+    output[6 * col_num + col] = buf1[6];
+    output[7 * col_num + col] = buf1[7];
+    output[8 * col_num + col] = buf1[8];
+    output[9 * col_num + col] = buf1[9];
+    output[10 * col_num + col] = buf1[10];
+    output[11 * col_num + col] = buf1[11];
+    output[12 * col_num + col] = buf1[12];
+    output[13 * col_num + col] = buf1[13];
+    output[14 * col_num + col] = buf1[14];
+    output[15 * col_num + col] = buf1[15];
+  }
+}
+
+void vp10_fadst32_new_sse4_1(const __m128i *input, __m128i *output,
+                             const int8_t *cos_bit, const int8_t *stage_range) {
+  const int txfm_size = 32;
+  const int num_per_128 = 4;
+  const int32_t *cospi;
+  __m128i buf0[32];
+  __m128i buf1[32];
+  int col_num = txfm_size / num_per_128;
+  int bit;
+  int col;
+  (void)stage_range;
+  for (col = 0; col < col_num; col++) {
+    // stage 0;
+    int32_t stage_idx = 0;
+    buf0[0] = input[0 * col_num + col];
+    buf0[1] = input[1 * col_num + col];
+    buf0[2] = input[2 * col_num + col];
+    buf0[3] = input[3 * col_num + col];
+    buf0[4] = input[4 * col_num + col];
+    buf0[5] = input[5 * col_num + col];
+    buf0[6] = input[6 * col_num + col];
+    buf0[7] = input[7 * col_num + col];
+    buf0[8] = input[8 * col_num + col];
+    buf0[9] = input[9 * col_num + col];
+    buf0[10] = input[10 * col_num + col];
+    buf0[11] = input[11 * col_num + col];
+    buf0[12] = input[12 * col_num + col];
+    buf0[13] = input[13 * col_num + col];
+    buf0[14] = input[14 * col_num + col];
+    buf0[15] = input[15 * col_num + col];
+    buf0[16] = input[16 * col_num + col];
+    buf0[17] = input[17 * col_num + col];
+    buf0[18] = input[18 * col_num + col];
+    buf0[19] = input[19 * col_num + col];
+    buf0[20] = input[20 * col_num + col];
+    buf0[21] = input[21 * col_num + col];
+    buf0[22] = input[22 * col_num + col];
+    buf0[23] = input[23 * col_num + col];
+    buf0[24] = input[24 * col_num + col];
+    buf0[25] = input[25 * col_num + col];
+    buf0[26] = input[26 * col_num + col];
+    buf0[27] = input[27 * col_num + col];
+    buf0[28] = input[28 * col_num + col];
+    buf0[29] = input[29 * col_num + col];
+    buf0[30] = input[30 * col_num + col];
+    buf0[31] = input[31 * col_num + col];
+
+    // stage 1
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = buf0[31];
+    buf1[1] = buf0[0];
+    buf1[2] = buf0[29];
+    buf1[3] = buf0[2];
+    buf1[4] = buf0[27];
+    buf1[5] = buf0[4];
+    buf1[6] = buf0[25];
+    buf1[7] = buf0[6];
+    buf1[8] = buf0[23];
+    buf1[9] = buf0[8];
+    buf1[10] = buf0[21];
+    buf1[11] = buf0[10];
+    buf1[12] = buf0[19];
+    buf1[13] = buf0[12];
+    buf1[14] = buf0[17];
+    buf1[15] = buf0[14];
+    buf1[16] = buf0[15];
+    buf1[17] = buf0[16];
+    buf1[18] = buf0[13];
+    buf1[19] = buf0[18];
+    buf1[20] = buf0[11];
+    buf1[21] = buf0[20];
+    buf1[22] = buf0[9];
+    buf1[23] = buf0[22];
+    buf1[24] = buf0[7];
+    buf1[25] = buf0[24];
+    buf1[26] = buf0[5];
+    buf1[27] = buf0[26];
+    buf1[28] = buf0[3];
+    buf1[29] = buf0[28];
+    buf1[30] = buf0[1];
+    buf1[31] = buf0[30];
+
+    // stage 2
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    btf_32_sse4_1_type0(cospi[1], cospi[63], buf1[0], buf1[1], buf0[0], buf0[1],
+                        bit);
+    btf_32_sse4_1_type0(cospi[5], cospi[59], buf1[2], buf1[3], buf0[2], buf0[3],
+                        bit);
+    btf_32_sse4_1_type0(cospi[9], cospi[55], buf1[4], buf1[5], buf0[4], buf0[5],
+                        bit);
+    btf_32_sse4_1_type0(cospi[13], cospi[51], buf1[6], buf1[7], buf0[6],
+                        buf0[7], bit);
+    btf_32_sse4_1_type0(cospi[17], cospi[47], buf1[8], buf1[9], buf0[8],
+                        buf0[9], bit);
+    btf_32_sse4_1_type0(cospi[21], cospi[43], buf1[10], buf1[11], buf0[10],
+                        buf0[11], bit);
+    btf_32_sse4_1_type0(cospi[25], cospi[39], buf1[12], buf1[13], buf0[12],
+                        buf0[13], bit);
+    btf_32_sse4_1_type0(cospi[29], cospi[35], buf1[14], buf1[15], buf0[14],
+                        buf0[15], bit);
+    btf_32_sse4_1_type0(cospi[33], cospi[31], buf1[16], buf1[17], buf0[16],
+                        buf0[17], bit);
+    btf_32_sse4_1_type0(cospi[37], cospi[27], buf1[18], buf1[19], buf0[18],
+                        buf0[19], bit);
+    btf_32_sse4_1_type0(cospi[41], cospi[23], buf1[20], buf1[21], buf0[20],
+                        buf0[21], bit);
+    btf_32_sse4_1_type0(cospi[45], cospi[19], buf1[22], buf1[23], buf0[22],
+                        buf0[23], bit);
+    btf_32_sse4_1_type0(cospi[49], cospi[15], buf1[24], buf1[25], buf0[24],
+                        buf0[25], bit);
+    btf_32_sse4_1_type0(cospi[53], cospi[11], buf1[26], buf1[27], buf0[26],
+                        buf0[27], bit);
+    btf_32_sse4_1_type0(cospi[57], cospi[7], buf1[28], buf1[29], buf0[28],
+                        buf0[29], bit);
+    btf_32_sse4_1_type0(cospi[61], cospi[3], buf1[30], buf1[31], buf0[30],
+                        buf0[31], bit);
+
+    // stage 3
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = _mm_add_epi32(buf0[0], buf0[16]);
+    buf1[16] = _mm_sub_epi32(buf0[0], buf0[16]);
+    buf1[1] = _mm_add_epi32(buf0[1], buf0[17]);
+    buf1[17] = _mm_sub_epi32(buf0[1], buf0[17]);
+    buf1[2] = _mm_add_epi32(buf0[2], buf0[18]);
+    buf1[18] = _mm_sub_epi32(buf0[2], buf0[18]);
+    buf1[3] = _mm_add_epi32(buf0[3], buf0[19]);
+    buf1[19] = _mm_sub_epi32(buf0[3], buf0[19]);
+    buf1[4] = _mm_add_epi32(buf0[4], buf0[20]);
+    buf1[20] = _mm_sub_epi32(buf0[4], buf0[20]);
+    buf1[5] = _mm_add_epi32(buf0[5], buf0[21]);
+    buf1[21] = _mm_sub_epi32(buf0[5], buf0[21]);
+    buf1[6] = _mm_add_epi32(buf0[6], buf0[22]);
+    buf1[22] = _mm_sub_epi32(buf0[6], buf0[22]);
+    buf1[7] = _mm_add_epi32(buf0[7], buf0[23]);
+    buf1[23] = _mm_sub_epi32(buf0[7], buf0[23]);
+    buf1[8] = _mm_add_epi32(buf0[8], buf0[24]);
+    buf1[24] = _mm_sub_epi32(buf0[8], buf0[24]);
+    buf1[9] = _mm_add_epi32(buf0[9], buf0[25]);
+    buf1[25] = _mm_sub_epi32(buf0[9], buf0[25]);
+    buf1[10] = _mm_add_epi32(buf0[10], buf0[26]);
+    buf1[26] = _mm_sub_epi32(buf0[10], buf0[26]);
+    buf1[11] = _mm_add_epi32(buf0[11], buf0[27]);
+    buf1[27] = _mm_sub_epi32(buf0[11], buf0[27]);
+    buf1[12] = _mm_add_epi32(buf0[12], buf0[28]);
+    buf1[28] = _mm_sub_epi32(buf0[12], buf0[28]);
+    buf1[13] = _mm_add_epi32(buf0[13], buf0[29]);
+    buf1[29] = _mm_sub_epi32(buf0[13], buf0[29]);
+    buf1[14] = _mm_add_epi32(buf0[14], buf0[30]);
+    buf1[30] = _mm_sub_epi32(buf0[14], buf0[30]);
+    buf1[15] = _mm_add_epi32(buf0[15], buf0[31]);
+    buf1[31] = _mm_sub_epi32(buf0[15], buf0[31]);
+
+    // stage 4
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf0[0] = buf1[0];
+    buf0[1] = buf1[1];
+    buf0[2] = buf1[2];
+    buf0[3] = buf1[3];
+    buf0[4] = buf1[4];
+    buf0[5] = buf1[5];
+    buf0[6] = buf1[6];
+    buf0[7] = buf1[7];
+    buf0[8] = buf1[8];
+    buf0[9] = buf1[9];
+    buf0[10] = buf1[10];
+    buf0[11] = buf1[11];
+    buf0[12] = buf1[12];
+    buf0[13] = buf1[13];
+    buf0[14] = buf1[14];
+    buf0[15] = buf1[15];
+    btf_32_sse4_1_type0(cospi[4], cospi[60], buf1[16], buf1[17], buf0[16],
+                        buf0[17], bit);
+    btf_32_sse4_1_type0(cospi[20], cospi[44], buf1[18], buf1[19], buf0[18],
+                        buf0[19], bit);
+    btf_32_sse4_1_type0(cospi[36], cospi[28], buf1[20], buf1[21], buf0[20],
+                        buf0[21], bit);
+    btf_32_sse4_1_type0(cospi[52], cospi[12], buf1[22], buf1[23], buf0[22],
+                        buf0[23], bit);
+    btf_32_sse4_1_type0(-cospi[60], cospi[4], buf1[24], buf1[25], buf0[24],
+                        buf0[25], bit);
+    btf_32_sse4_1_type0(-cospi[44], cospi[20], buf1[26], buf1[27], buf0[26],
+                        buf0[27], bit);
+    btf_32_sse4_1_type0(-cospi[28], cospi[36], buf1[28], buf1[29], buf0[28],
+                        buf0[29], bit);
+    btf_32_sse4_1_type0(-cospi[12], cospi[52], buf1[30], buf1[31], buf0[30],
+                        buf0[31], bit);
+
+    // stage 5
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = _mm_add_epi32(buf0[0], buf0[8]);
+    buf1[8] = _mm_sub_epi32(buf0[0], buf0[8]);
+    buf1[1] = _mm_add_epi32(buf0[1], buf0[9]);
+    buf1[9] = _mm_sub_epi32(buf0[1], buf0[9]);
+    buf1[2] = _mm_add_epi32(buf0[2], buf0[10]);
+    buf1[10] = _mm_sub_epi32(buf0[2], buf0[10]);
+    buf1[3] = _mm_add_epi32(buf0[3], buf0[11]);
+    buf1[11] = _mm_sub_epi32(buf0[3], buf0[11]);
+    buf1[4] = _mm_add_epi32(buf0[4], buf0[12]);
+    buf1[12] = _mm_sub_epi32(buf0[4], buf0[12]);
+    buf1[5] = _mm_add_epi32(buf0[5], buf0[13]);
+    buf1[13] = _mm_sub_epi32(buf0[5], buf0[13]);
+    buf1[6] = _mm_add_epi32(buf0[6], buf0[14]);
+    buf1[14] = _mm_sub_epi32(buf0[6], buf0[14]);
+    buf1[7] = _mm_add_epi32(buf0[7], buf0[15]);
+    buf1[15] = _mm_sub_epi32(buf0[7], buf0[15]);
+    buf1[16] = _mm_add_epi32(buf0[16], buf0[24]);
+    buf1[24] = _mm_sub_epi32(buf0[16], buf0[24]);
+    buf1[17] = _mm_add_epi32(buf0[17], buf0[25]);
+    buf1[25] = _mm_sub_epi32(buf0[17], buf0[25]);
+    buf1[18] = _mm_add_epi32(buf0[18], buf0[26]);
+    buf1[26] = _mm_sub_epi32(buf0[18], buf0[26]);
+    buf1[19] = _mm_add_epi32(buf0[19], buf0[27]);
+    buf1[27] = _mm_sub_epi32(buf0[19], buf0[27]);
+    buf1[20] = _mm_add_epi32(buf0[20], buf0[28]);
+    buf1[28] = _mm_sub_epi32(buf0[20], buf0[28]);
+    buf1[21] = _mm_add_epi32(buf0[21], buf0[29]);
+    buf1[29] = _mm_sub_epi32(buf0[21], buf0[29]);
+    buf1[22] = _mm_add_epi32(buf0[22], buf0[30]);
+    buf1[30] = _mm_sub_epi32(buf0[22], buf0[30]);
+    buf1[23] = _mm_add_epi32(buf0[23], buf0[31]);
+    buf1[31] = _mm_sub_epi32(buf0[23], buf0[31]);
+
+    // stage 6
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf0[0] = buf1[0];
+    buf0[1] = buf1[1];
+    buf0[2] = buf1[2];
+    buf0[3] = buf1[3];
+    buf0[4] = buf1[4];
+    buf0[5] = buf1[5];
+    buf0[6] = buf1[6];
+    buf0[7] = buf1[7];
+    btf_32_sse4_1_type0(cospi[8], cospi[56], buf1[8], buf1[9], buf0[8], buf0[9],
+                        bit);
+    btf_32_sse4_1_type0(cospi[40], cospi[24], buf1[10], buf1[11], buf0[10],
+                        buf0[11], bit);
+    btf_32_sse4_1_type0(-cospi[56], cospi[8], buf1[12], buf1[13], buf0[12],
+                        buf0[13], bit);
+    btf_32_sse4_1_type0(-cospi[24], cospi[40], buf1[14], buf1[15], buf0[14],
+                        buf0[15], bit);
+    buf0[16] = buf1[16];
+    buf0[17] = buf1[17];
+    buf0[18] = buf1[18];
+    buf0[19] = buf1[19];
+    buf0[20] = buf1[20];
+    buf0[21] = buf1[21];
+    buf0[22] = buf1[22];
+    buf0[23] = buf1[23];
+    btf_32_sse4_1_type0(cospi[8], cospi[56], buf1[24], buf1[25], buf0[24],
+                        buf0[25], bit);
+    btf_32_sse4_1_type0(cospi[40], cospi[24], buf1[26], buf1[27], buf0[26],
+                        buf0[27], bit);
+    btf_32_sse4_1_type0(-cospi[56], cospi[8], buf1[28], buf1[29], buf0[28],
+                        buf0[29], bit);
+    btf_32_sse4_1_type0(-cospi[24], cospi[40], buf1[30], buf1[31], buf0[30],
+                        buf0[31], bit);
+
+    // stage 7
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = _mm_add_epi32(buf0[0], buf0[4]);
+    buf1[4] = _mm_sub_epi32(buf0[0], buf0[4]);
+    buf1[1] = _mm_add_epi32(buf0[1], buf0[5]);
+    buf1[5] = _mm_sub_epi32(buf0[1], buf0[5]);
+    buf1[2] = _mm_add_epi32(buf0[2], buf0[6]);
+    buf1[6] = _mm_sub_epi32(buf0[2], buf0[6]);
+    buf1[3] = _mm_add_epi32(buf0[3], buf0[7]);
+    buf1[7] = _mm_sub_epi32(buf0[3], buf0[7]);
+    buf1[8] = _mm_add_epi32(buf0[8], buf0[12]);
+    buf1[12] = _mm_sub_epi32(buf0[8], buf0[12]);
+    buf1[9] = _mm_add_epi32(buf0[9], buf0[13]);
+    buf1[13] = _mm_sub_epi32(buf0[9], buf0[13]);
+    buf1[10] = _mm_add_epi32(buf0[10], buf0[14]);
+    buf1[14] = _mm_sub_epi32(buf0[10], buf0[14]);
+    buf1[11] = _mm_add_epi32(buf0[11], buf0[15]);
+    buf1[15] = _mm_sub_epi32(buf0[11], buf0[15]);
+    buf1[16] = _mm_add_epi32(buf0[16], buf0[20]);
+    buf1[20] = _mm_sub_epi32(buf0[16], buf0[20]);
+    buf1[17] = _mm_add_epi32(buf0[17], buf0[21]);
+    buf1[21] = _mm_sub_epi32(buf0[17], buf0[21]);
+    buf1[18] = _mm_add_epi32(buf0[18], buf0[22]);
+    buf1[22] = _mm_sub_epi32(buf0[18], buf0[22]);
+    buf1[19] = _mm_add_epi32(buf0[19], buf0[23]);
+    buf1[23] = _mm_sub_epi32(buf0[19], buf0[23]);
+    buf1[24] = _mm_add_epi32(buf0[24], buf0[28]);
+    buf1[28] = _mm_sub_epi32(buf0[24], buf0[28]);
+    buf1[25] = _mm_add_epi32(buf0[25], buf0[29]);
+    buf1[29] = _mm_sub_epi32(buf0[25], buf0[29]);
+    buf1[26] = _mm_add_epi32(buf0[26], buf0[30]);
+    buf1[30] = _mm_sub_epi32(buf0[26], buf0[30]);
+    buf1[27] = _mm_add_epi32(buf0[27], buf0[31]);
+    buf1[31] = _mm_sub_epi32(buf0[27], buf0[31]);
+
+    // stage 8
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf0[0] = buf1[0];
+    buf0[1] = buf1[1];
+    buf0[2] = buf1[2];
+    buf0[3] = buf1[3];
+    btf_32_sse4_1_type0(cospi[16], cospi[48], buf1[4], buf1[5], buf0[4],
+                        buf0[5], bit);
+    btf_32_sse4_1_type0(-cospi[48], cospi[16], buf1[6], buf1[7], buf0[6],
+                        buf0[7], bit);
+    buf0[8] = buf1[8];
+    buf0[9] = buf1[9];
+    buf0[10] = buf1[10];
+    buf0[11] = buf1[11];
+    btf_32_sse4_1_type0(cospi[16], cospi[48], buf1[12], buf1[13], buf0[12],
+                        buf0[13], bit);
+    btf_32_sse4_1_type0(-cospi[48], cospi[16], buf1[14], buf1[15], buf0[14],
+                        buf0[15], bit);
+    buf0[16] = buf1[16];
+    buf0[17] = buf1[17];
+    buf0[18] = buf1[18];
+    buf0[19] = buf1[19];
+    btf_32_sse4_1_type0(cospi[16], cospi[48], buf1[20], buf1[21], buf0[20],
+                        buf0[21], bit);
+    btf_32_sse4_1_type0(-cospi[48], cospi[16], buf1[22], buf1[23], buf0[22],
+                        buf0[23], bit);
+    buf0[24] = buf1[24];
+    buf0[25] = buf1[25];
+    buf0[26] = buf1[26];
+    buf0[27] = buf1[27];
+    btf_32_sse4_1_type0(cospi[16], cospi[48], buf1[28], buf1[29], buf0[28],
+                        buf0[29], bit);
+    btf_32_sse4_1_type0(-cospi[48], cospi[16], buf1[30], buf1[31], buf0[30],
+                        buf0[31], bit);
+
+    // stage 9
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = _mm_add_epi32(buf0[0], buf0[2]);
+    buf1[2] = _mm_sub_epi32(buf0[0], buf0[2]);
+    buf1[1] = _mm_add_epi32(buf0[1], buf0[3]);
+    buf1[3] = _mm_sub_epi32(buf0[1], buf0[3]);
+    buf1[4] = _mm_add_epi32(buf0[4], buf0[6]);
+    buf1[6] = _mm_sub_epi32(buf0[4], buf0[6]);
+    buf1[5] = _mm_add_epi32(buf0[5], buf0[7]);
+    buf1[7] = _mm_sub_epi32(buf0[5], buf0[7]);
+    buf1[8] = _mm_add_epi32(buf0[8], buf0[10]);
+    buf1[10] = _mm_sub_epi32(buf0[8], buf0[10]);
+    buf1[9] = _mm_add_epi32(buf0[9], buf0[11]);
+    buf1[11] = _mm_sub_epi32(buf0[9], buf0[11]);
+    buf1[12] = _mm_add_epi32(buf0[12], buf0[14]);
+    buf1[14] = _mm_sub_epi32(buf0[12], buf0[14]);
+    buf1[13] = _mm_add_epi32(buf0[13], buf0[15]);
+    buf1[15] = _mm_sub_epi32(buf0[13], buf0[15]);
+    buf1[16] = _mm_add_epi32(buf0[16], buf0[18]);
+    buf1[18] = _mm_sub_epi32(buf0[16], buf0[18]);
+    buf1[17] = _mm_add_epi32(buf0[17], buf0[19]);
+    buf1[19] = _mm_sub_epi32(buf0[17], buf0[19]);
+    buf1[20] = _mm_add_epi32(buf0[20], buf0[22]);
+    buf1[22] = _mm_sub_epi32(buf0[20], buf0[22]);
+    buf1[21] = _mm_add_epi32(buf0[21], buf0[23]);
+    buf1[23] = _mm_sub_epi32(buf0[21], buf0[23]);
+    buf1[24] = _mm_add_epi32(buf0[24], buf0[26]);
+    buf1[26] = _mm_sub_epi32(buf0[24], buf0[26]);
+    buf1[25] = _mm_add_epi32(buf0[25], buf0[27]);
+    buf1[27] = _mm_sub_epi32(buf0[25], buf0[27]);
+    buf1[28] = _mm_add_epi32(buf0[28], buf0[30]);
+    buf1[30] = _mm_sub_epi32(buf0[28], buf0[30]);
+    buf1[29] = _mm_add_epi32(buf0[29], buf0[31]);
+    buf1[31] = _mm_sub_epi32(buf0[29], buf0[31]);
+
+    // stage 10
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf0[0] = buf1[0];
+    buf0[1] = buf1[1];
+    btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[2], buf1[3], buf0[2],
+                        buf0[3], bit);
+    buf0[4] = buf1[4];
+    buf0[5] = buf1[5];
+    btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[6], buf1[7], buf0[6],
+                        buf0[7], bit);
+    buf0[8] = buf1[8];
+    buf0[9] = buf1[9];
+    btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[10], buf1[11], buf0[10],
+                        buf0[11], bit);
+    buf0[12] = buf1[12];
+    buf0[13] = buf1[13];
+    btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[14], buf1[15], buf0[14],
+                        buf0[15], bit);
+    buf0[16] = buf1[16];
+    buf0[17] = buf1[17];
+    btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[18], buf1[19], buf0[18],
+                        buf0[19], bit);
+    buf0[20] = buf1[20];
+    buf0[21] = buf1[21];
+    btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[22], buf1[23], buf0[22],
+                        buf0[23], bit);
+    buf0[24] = buf1[24];
+    buf0[25] = buf1[25];
+    btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[26], buf1[27], buf0[26],
+                        buf0[27], bit);
+    buf0[28] = buf1[28];
+    buf0[29] = buf1[29];
+    btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[30], buf1[31], buf0[30],
+                        buf0[31], bit);
+
+    // stage 11
+    stage_idx++;
+    bit = cos_bit[stage_idx];
+    cospi = cospi_arr[bit - cos_bit_min];
+    buf1[0] = buf0[0];
+    buf1[1] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[16]);
+    buf1[2] = buf0[24];
+    buf1[3] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[8]);
+    buf1[4] = buf0[12];
+    buf1[5] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[28]);
+    buf1[6] = buf0[20];
+    buf1[7] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[4]);
+    buf1[8] = buf0[6];
+    buf1[9] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[22]);
+    buf1[10] = buf0[30];
+    buf1[11] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[14]);
+    buf1[12] = buf0[10];
+    buf1[13] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[26]);
+    buf1[14] = buf0[18];
+    buf1[15] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[2]);
+    buf1[16] = buf0[3];
+    buf1[17] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[19]);
+    buf1[18] = buf0[27];
+    buf1[19] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[11]);
+    buf1[20] = buf0[15];
+    buf1[21] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[31]);
+    buf1[22] = buf0[23];
+    buf1[23] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[7]);
+    buf1[24] = buf0[5];
+    buf1[25] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[21]);
+    buf1[26] = buf0[29];
+    buf1[27] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[13]);
+    buf1[28] = buf0[9];
+    buf1[29] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[25]);
+    buf1[30] = buf0[17];
+    buf1[31] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[1]);
+
+    output[0 * col_num + col] = buf1[0];
+    output[1 * col_num + col] = buf1[1];
+    output[2 * col_num + col] = buf1[2];
+    output[3 * col_num + col] = buf1[3];
+    output[4 * col_num + col] = buf1[4];
+    output[5 * col_num + col] = buf1[5];
+    output[6 * col_num + col] = buf1[6];
+    output[7 * col_num + col] = buf1[7];
+    output[8 * col_num + col] = buf1[8];
+    output[9 * col_num + col] = buf1[9];
+    output[10 * col_num + col] = buf1[10];
+    output[11 * col_num + col] = buf1[11];
+    output[12 * col_num + col] = buf1[12];
+    output[13 * col_num + col] = buf1[13];
+    output[14 * col_num + col] = buf1[14];
+    output[15 * col_num + col] = buf1[15];
+    output[16 * col_num + col] = buf1[16];
+    output[17 * col_num + col] = buf1[17];
+    output[18 * col_num + col] = buf1[18];
+    output[19 * col_num + col] = buf1[19];
+    output[20 * col_num + col] = buf1[20];
+    output[21 * col_num + col] = buf1[21];
+    output[22 * col_num + col] = buf1[22];
+    output[23 * col_num + col] = buf1[23];
+    output[24 * col_num + col] = buf1[24];
+    output[25 * col_num + col] = buf1[25];
+    output[26 * col_num + col] = buf1[26];
+    output[27 * col_num + col] = buf1[27];
+    output[28 * col_num + col] = buf1[28];
+    output[29 * col_num + col] = buf1[29];
+    output[30 * col_num + col] = buf1[30];
+    output[31 * col_num + col] = buf1[31];
+  }
+}
diff --git a/av1/common/x86/vp10_fwd_txfm2d_sse4.c b/av1/common/x86/vp10_fwd_txfm2d_sse4.c
new file mode 100644
index 0000000..a59a0c8
--- /dev/null
+++ b/av1/common/x86/vp10_fwd_txfm2d_sse4.c
@@ -0,0 +1,86 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp10_rtcd.h"
+#include "av1/common/enums.h"
+#include "av1/common/vp10_txfm.h"
+#include "av1/common/x86/vp10_txfm1d_sse4.h"
+
+static INLINE void int16_array_with_stride_to_int32_array_without_stride(
+    const int16_t *input, int stride, int32_t *output, int txfm1d_size) {
+  int r, c;
+  for (r = 0; r < txfm1d_size; r++) {
+    for (c = 0; c < txfm1d_size; c++) {
+      output[r * txfm1d_size + c] = (int32_t)input[r * stride + c];
+    }
+  }
+}
+
+typedef void (*TxfmFuncSSE2)(const __m128i *input, __m128i *output,
+                             const int8_t *cos_bit, const int8_t *stage_range);
+
+static INLINE TxfmFuncSSE2 fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
+  switch (txfm_type) {
+    case TXFM_TYPE_DCT4: return vp10_fdct4_new_sse4_1; break;
+    case TXFM_TYPE_DCT8: return vp10_fdct8_new_sse4_1; break;
+    case TXFM_TYPE_DCT16: return vp10_fdct16_new_sse4_1; break;
+    case TXFM_TYPE_DCT32: return vp10_fdct32_new_sse4_1; break;
+    case TXFM_TYPE_ADST4: return vp10_fadst4_new_sse4_1; break;
+    case TXFM_TYPE_ADST8: return vp10_fadst8_new_sse4_1; break;
+    case TXFM_TYPE_ADST16: return vp10_fadst16_new_sse4_1; break;
+    case TXFM_TYPE_ADST32: return vp10_fadst32_new_sse4_1; break;
+    default: assert(0);
+  }
+  return NULL;
+}
+
+static INLINE void fwd_txfm2d_sse4_1(const int16_t *input, int32_t *output,
+                                     const int stride, const TXFM_2D_CFG *cfg,
+                                     int32_t *txfm_buf) {
+  const int txfm_size = cfg->txfm_size;
+  const int8_t *shift = cfg->shift;
+  const int8_t *stage_range_col = cfg->stage_range_col;
+  const int8_t *stage_range_row = cfg->stage_range_row;
+  const int8_t *cos_bit_col = cfg->cos_bit_col;
+  const int8_t *cos_bit_row = cfg->cos_bit_row;
+  const TxfmFuncSSE2 txfm_func_col = fwd_txfm_type_to_func(cfg->txfm_type_col);
+  const TxfmFuncSSE2 txfm_func_row = fwd_txfm_type_to_func(cfg->txfm_type_row);
+
+  __m128i *buf_128 = (__m128i *)txfm_buf;
+  __m128i *out_128 = (__m128i *)output;
+  int num_per_128 = 4;
+  int txfm2d_size_128 = txfm_size * txfm_size / num_per_128;
+
+  int16_array_with_stride_to_int32_array_without_stride(input, stride, txfm_buf,
+                                                        txfm_size);
+  round_shift_array_32_sse4_1(buf_128, out_128, txfm2d_size_128, -shift[0]);
+  txfm_func_col(out_128, buf_128, cos_bit_col, stage_range_col);
+  round_shift_array_32_sse4_1(buf_128, out_128, txfm2d_size_128, -shift[1]);
+  transpose_32(txfm_size, out_128, buf_128);
+  txfm_func_row(buf_128, out_128, cos_bit_row, stage_range_row);
+  round_shift_array_32_sse4_1(out_128, buf_128, txfm2d_size_128, -shift[2]);
+  transpose_32(txfm_size, buf_128, out_128);
+}
+
+void vp10_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output,
+                                  int stride, int tx_type, int bd) {
+  int32_t txfm_buf[1024];
+  TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_cfg(tx_type, TX_32X32);
+  (void)bd;
+  fwd_txfm2d_sse4_1(input, output, stride, cfg.cfg, txfm_buf);
+}
+
+void vp10_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output,
+                                  int stride, int tx_type, int bd) {
+  int32_t txfm_buf[4096];
+  TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_64x64_cfg(tx_type);
+  (void)bd;
+  fwd_txfm2d_sse4_1(input, output, stride, cfg.cfg, txfm_buf);
+}
diff --git a/av1/common/x86/vp10_fwd_txfm_impl_sse2.h b/av1/common/x86/vp10_fwd_txfm_impl_sse2.h
new file mode 100644
index 0000000..9bb8abc
--- /dev/null
+++ b/av1/common/x86/vp10_fwd_txfm_impl_sse2.h
@@ -0,0 +1,1013 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <emmintrin.h>  // SSE2
+
+#include "./vpx_dsp_rtcd.h"
+#include "aom_dsp/txfm_common.h"
+#include "aom_dsp/x86/fwd_txfm_sse2.h"
+#include "aom_dsp/x86/txfm_common_sse2.h"
+#include "aom_ports/mem.h"
+
+// TODO(jingning) The high bit-depth functions need rework for performance.
+// After we properly fix the high bit-depth function implementations, this
+// file's dependency should be substantially simplified.
+#if DCT_HIGH_BIT_DEPTH
+#define ADD_EPI16 _mm_adds_epi16
+#define SUB_EPI16 _mm_subs_epi16
+
+#else
+#define ADD_EPI16 _mm_add_epi16
+#define SUB_EPI16 _mm_sub_epi16
+#endif
+
+void FDCT4x4_2D(const int16_t *input, tran_low_t *output, int stride) {
+  // This 2D transform implements 4 vertical 1D transforms followed
+  // by 4 horizontal 1D transforms.  The multiplies and adds are as given
+  // by Chen, Smith and Fralick ('77).  The commands for moving the data
+  // around have been minimized by hand.
+  // For the purposes of the comments, the 16 inputs are referred to at i0
+  // through iF (in raster order), intermediate variables are a0, b0, c0
+  // through f, and correspond to the in-place computations mapped to input
+  // locations.  The outputs, o0 through oF are labeled according to the
+  // output locations.
+
+  // Constants
+  // These are the coefficients used for the multiplies.
+  // In the comments, pN means cos(N pi /64) and mN is -cos(N pi /64),
+  // where cospi_N_64 = cos(N pi /64)
+  const __m128i k__cospi_A =
+      octa_set_epi16(cospi_16_64, cospi_16_64, cospi_16_64, cospi_16_64,
+                     cospi_16_64, -cospi_16_64, cospi_16_64, -cospi_16_64);
+  const __m128i k__cospi_B =
+      octa_set_epi16(cospi_16_64, -cospi_16_64, cospi_16_64, -cospi_16_64,
+                     cospi_16_64, cospi_16_64, cospi_16_64, cospi_16_64);
+  const __m128i k__cospi_C =
+      octa_set_epi16(cospi_8_64, cospi_24_64, cospi_8_64, cospi_24_64,
+                     cospi_24_64, -cospi_8_64, cospi_24_64, -cospi_8_64);
+  const __m128i k__cospi_D =
+      octa_set_epi16(cospi_24_64, -cospi_8_64, cospi_24_64, -cospi_8_64,
+                     cospi_8_64, cospi_24_64, cospi_8_64, cospi_24_64);
+  const __m128i k__cospi_E =
+      octa_set_epi16(cospi_16_64, cospi_16_64, cospi_16_64, cospi_16_64,
+                     cospi_16_64, cospi_16_64, cospi_16_64, cospi_16_64);
+  const __m128i k__cospi_F =
+      octa_set_epi16(cospi_16_64, -cospi_16_64, cospi_16_64, -cospi_16_64,
+                     cospi_16_64, -cospi_16_64, cospi_16_64, -cospi_16_64);
+  const __m128i k__cospi_G =
+      octa_set_epi16(cospi_8_64, cospi_24_64, cospi_8_64, cospi_24_64,
+                     -cospi_8_64, -cospi_24_64, -cospi_8_64, -cospi_24_64);
+  const __m128i k__cospi_H =
+      octa_set_epi16(cospi_24_64, -cospi_8_64, cospi_24_64, -cospi_8_64,
+                     -cospi_24_64, cospi_8_64, -cospi_24_64, cospi_8_64);
+
+  const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+  // This second rounding constant saves doing some extra adds at the end
+  const __m128i k__DCT_CONST_ROUNDING2 =
+      _mm_set1_epi32(DCT_CONST_ROUNDING + (DCT_CONST_ROUNDING << 1));
+  const int DCT_CONST_BITS2 = DCT_CONST_BITS + 2;
+  const __m128i k__nonzero_bias_a = _mm_setr_epi16(0, 1, 1, 1, 1, 1, 1, 1);
+  const __m128i k__nonzero_bias_b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
+  __m128i in0, in1;
+#if DCT_HIGH_BIT_DEPTH
+  __m128i cmp0, cmp1;
+  int test, overflow;
+#endif
+
+  // Load inputs.
+  in0 = _mm_loadl_epi64((const __m128i *)(input + 0 * stride));
+  in1 = _mm_loadl_epi64((const __m128i *)(input + 1 * stride));
+  in1 = _mm_unpacklo_epi64(
+      in1, _mm_loadl_epi64((const __m128i *)(input + 2 * stride)));
+  in0 = _mm_unpacklo_epi64(
+      in0, _mm_loadl_epi64((const __m128i *)(input + 3 * stride)));
+// in0 = [i0 i1 i2 i3 iC iD iE iF]
+// in1 = [i4 i5 i6 i7 i8 i9 iA iB]
+#if DCT_HIGH_BIT_DEPTH
+  // Check inputs small enough to use optimised code
+  cmp0 = _mm_xor_si128(_mm_cmpgt_epi16(in0, _mm_set1_epi16(0x3ff)),
+                       _mm_cmplt_epi16(in0, _mm_set1_epi16(0xfc00)));
+  cmp1 = _mm_xor_si128(_mm_cmpgt_epi16(in1, _mm_set1_epi16(0x3ff)),
+                       _mm_cmplt_epi16(in1, _mm_set1_epi16(0xfc00)));
+  test = _mm_movemask_epi8(_mm_or_si128(cmp0, cmp1));
+  if (test) {
+    vpx_highbd_fdct4x4_c(input, output, stride);
+    return;
+  }
+#endif  // DCT_HIGH_BIT_DEPTH
+
+  // multiply by 16 to give some extra precision
+  in0 = _mm_slli_epi16(in0, 4);
+  in1 = _mm_slli_epi16(in1, 4);
+  // if (i == 0 && input[0]) input[0] += 1;
+  // add 1 to the upper left pixel if it is non-zero, which helps reduce
+  // the round-trip error
+  {
+    // The mask will only contain whether the first value is zero, all
+    // other comparison will fail as something shifted by 4 (above << 4)
+    // can never be equal to one. To increment in the non-zero case, we
+    // add the mask and one for the first element:
+    //   - if zero, mask = -1, v = v - 1 + 1 = v
+    //   - if non-zero, mask = 0, v = v + 0 + 1 = v + 1
+    __m128i mask = _mm_cmpeq_epi16(in0, k__nonzero_bias_a);
+    in0 = _mm_add_epi16(in0, mask);
+    in0 = _mm_add_epi16(in0, k__nonzero_bias_b);
+  }
+  // There are 4 total stages, alternating between an add/subtract stage
+  // followed by an multiply-and-add stage.
+  {
+    // Stage 1: Add/subtract
+
+    // in0 = [i0 i1 i2 i3 iC iD iE iF]
+    // in1 = [i4 i5 i6 i7 i8 i9 iA iB]
+    const __m128i r0 = _mm_unpacklo_epi16(in0, in1);
+    const __m128i r1 = _mm_unpackhi_epi16(in0, in1);
+    // r0 = [i0 i4 i1 i5 i2 i6 i3 i7]
+    // r1 = [iC i8 iD i9 iE iA iF iB]
+    const __m128i r2 = _mm_shuffle_epi32(r0, 0xB4);
+    const __m128i r3 = _mm_shuffle_epi32(r1, 0xB4);
+    // r2 = [i0 i4 i1 i5 i3 i7 i2 i6]
+    // r3 = [iC i8 iD i9 iF iB iE iA]
+
+    const __m128i t0 = _mm_add_epi16(r2, r3);
+    const __m128i t1 = _mm_sub_epi16(r2, r3);
+    // t0 = [a0 a4 a1 a5 a3 a7 a2 a6]
+    // t1 = [aC a8 aD a9 aF aB aE aA]
+
+    // Stage 2: multiply by constants (which gets us into 32 bits).
+    // The constants needed here are:
+    // k__cospi_A = [p16 p16 p16 p16 p16 m16 p16 m16]
+    // k__cospi_B = [p16 m16 p16 m16 p16 p16 p16 p16]
+    // k__cospi_C = [p08 p24 p08 p24 p24 m08 p24 m08]
+    // k__cospi_D = [p24 m08 p24 m08 p08 p24 p08 p24]
+    const __m128i u0 = _mm_madd_epi16(t0, k__cospi_A);
+    const __m128i u2 = _mm_madd_epi16(t0, k__cospi_B);
+    const __m128i u1 = _mm_madd_epi16(t1, k__cospi_C);
+    const __m128i u3 = _mm_madd_epi16(t1, k__cospi_D);
+    // Then add and right-shift to get back to 16-bit range
+    const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+    const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+    const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+    const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+    const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+    const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+    const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+    const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+    // w0 = [b0 b1 b7 b6]
+    // w1 = [b8 b9 bF bE]
+    // w2 = [b4 b5 b3 b2]
+    // w3 = [bC bD bB bA]
+    const __m128i x0 = _mm_packs_epi32(w0, w1);
+    const __m128i x1 = _mm_packs_epi32(w2, w3);
+#if DCT_HIGH_BIT_DEPTH
+    overflow = check_epi16_overflow_x2(&x0, &x1);
+    if (overflow) {
+      vpx_highbd_fdct4x4_c(input, output, stride);
+      return;
+    }
+#endif  // DCT_HIGH_BIT_DEPTH
+    // x0 = [b0 b1 b7 b6 b8 b9 bF bE]
+    // x1 = [b4 b5 b3 b2 bC bD bB bA]
+    in0 = _mm_shuffle_epi32(x0, 0xD8);
+    in1 = _mm_shuffle_epi32(x1, 0x8D);
+    // in0 = [b0 b1 b8 b9 b7 b6 bF bE]
+    // in1 = [b3 b2 bB bA b4 b5 bC bD]
+  }
+  {
+    // vertical DCTs finished. Now we do the horizontal DCTs.
+    // Stage 3: Add/subtract
+
+    // t0 = [c0 c1 c8 c9  c4  c5  cC  cD]
+    // t1 = [c3 c2 cB cA -c7 -c6 -cF -cE]
+    const __m128i t0 = ADD_EPI16(in0, in1);
+    const __m128i t1 = SUB_EPI16(in0, in1);
+#if DCT_HIGH_BIT_DEPTH
+    overflow = check_epi16_overflow_x2(&t0, &t1);
+    if (overflow) {
+      vpx_highbd_fdct4x4_c(input, output, stride);
+      return;
+    }
+#endif  // DCT_HIGH_BIT_DEPTH
+
+    // Stage 4: multiply by constants (which gets us into 32 bits).
+    {
+      // The constants needed here are:
+      // k__cospi_E = [p16 p16 p16 p16 p16 p16 p16 p16]
+      // k__cospi_F = [p16 m16 p16 m16 p16 m16 p16 m16]
+      // k__cospi_G = [p08 p24 p08 p24 m08 m24 m08 m24]
+      // k__cospi_H = [p24 m08 p24 m08 m24 p08 m24 p08]
+      const __m128i u0 = _mm_madd_epi16(t0, k__cospi_E);
+      const __m128i u1 = _mm_madd_epi16(t0, k__cospi_F);
+      const __m128i u2 = _mm_madd_epi16(t1, k__cospi_G);
+      const __m128i u3 = _mm_madd_epi16(t1, k__cospi_H);
+      // Then add and right-shift to get back to 16-bit range
+      // but this combines the final right-shift as well to save operations
+      // This unusual rounding operations is to maintain bit-accurate
+      // compatibility with the c version of this function which has two
+      // rounding steps in a row.
+      const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING2);
+      const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING2);
+      const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING2);
+      const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING2);
+      const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS2);
+      const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS2);
+      const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS2);
+      const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS2);
+      // w0 = [o0 o4 o8 oC]
+      // w1 = [o2 o6 oA oE]
+      // w2 = [o1 o5 o9 oD]
+      // w3 = [o3 o7 oB oF]
+      // remember the o's are numbered according to the correct output location
+      const __m128i x0 = _mm_packs_epi32(w0, w1);
+      const __m128i x1 = _mm_packs_epi32(w2, w3);
+#if DCT_HIGH_BIT_DEPTH
+      overflow = check_epi16_overflow_x2(&x0, &x1);
+      if (overflow) {
+        vpx_highbd_fdct4x4_c(input, output, stride);
+        return;
+      }
+#endif  // DCT_HIGH_BIT_DEPTH
+      {
+        // x0 = [o0 o4 o8 oC o2 o6 oA oE]
+        // x1 = [o1 o5 o9 oD o3 o7 oB oF]
+        const __m128i y0 = _mm_unpacklo_epi16(x0, x1);
+        const __m128i y1 = _mm_unpackhi_epi16(x0, x1);
+        // y0 = [o0 o1 o4 o5 o8 o9 oC oD]
+        // y1 = [o2 o3 o6 o7 oA oB oE oF]
+        in0 = _mm_unpacklo_epi32(y0, y1);
+        // in0 = [o0 o1 o2 o3 o4 o5 o6 o7]
+        in1 = _mm_unpackhi_epi32(y0, y1);
+        // in1 = [o8 o9 oA oB oC oD oE oF]
+      }
+    }
+  }
+  // Post-condition (v + 1) >> 2 is now incorporated into previous
+  // add and right-shift commands.  Only 2 store instructions needed
+  // because we are using the fact that 1/3 are stored just after 0/2.
+  storeu_output(&in0, output + 0 * 4);
+  storeu_output(&in1, output + 2 * 4);
+}
+
+void FDCT8x8_2D(const int16_t *input, tran_low_t *output, int stride) {
+  int pass;
+  // Constants
+  //    When we use them, in one case, they are all the same. In all others
+  //    it's a pair of them that we need to repeat four times. This is done
+  //    by constructing the 32 bit constant corresponding to that pair.
+  const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
+  const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+  const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
+  const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+  const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64);
+  const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64);
+  const __m128i k__cospi_p12_p20 = pair_set_epi16(cospi_12_64, cospi_20_64);
+  const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+  const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+#if DCT_HIGH_BIT_DEPTH
+  int overflow;
+#endif
+  // Load input
+  __m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
+  __m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
+  __m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
+  __m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
+  __m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride));
+  __m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride));
+  __m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride));
+  __m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride));
+  // Pre-condition input (shift by two)
+  in0 = _mm_slli_epi16(in0, 2);
+  in1 = _mm_slli_epi16(in1, 2);
+  in2 = _mm_slli_epi16(in2, 2);
+  in3 = _mm_slli_epi16(in3, 2);
+  in4 = _mm_slli_epi16(in4, 2);
+  in5 = _mm_slli_epi16(in5, 2);
+  in6 = _mm_slli_epi16(in6, 2);
+  in7 = _mm_slli_epi16(in7, 2);
+
+  // We do two passes, first the columns, then the rows. The results of the
+  // first pass are transposed so that the same column code can be reused. The
+  // results of the second pass are also transposed so that the rows (processed
+  // as columns) are put back in row positions.
+  for (pass = 0; pass < 2; pass++) {
+    // To store results of each pass before the transpose.
+    __m128i res0, res1, res2, res3, res4, res5, res6, res7;
+    // Add/subtract
+    const __m128i q0 = ADD_EPI16(in0, in7);
+    const __m128i q1 = ADD_EPI16(in1, in6);
+    const __m128i q2 = ADD_EPI16(in2, in5);
+    const __m128i q3 = ADD_EPI16(in3, in4);
+    const __m128i q4 = SUB_EPI16(in3, in4);
+    const __m128i q5 = SUB_EPI16(in2, in5);
+    const __m128i q6 = SUB_EPI16(in1, in6);
+    const __m128i q7 = SUB_EPI16(in0, in7);
+#if DCT_HIGH_BIT_DEPTH
+    if (pass == 1) {
+      overflow =
+          check_epi16_overflow_x8(&q0, &q1, &q2, &q3, &q4, &q5, &q6, &q7);
+      if (overflow) {
+        vpx_highbd_fdct8x8_c(input, output, stride);
+        return;
+      }
+    }
+#endif  // DCT_HIGH_BIT_DEPTH
+    // Work on first four results
+    {
+      // Add/subtract
+      const __m128i r0 = ADD_EPI16(q0, q3);
+      const __m128i r1 = ADD_EPI16(q1, q2);
+      const __m128i r2 = SUB_EPI16(q1, q2);
+      const __m128i r3 = SUB_EPI16(q0, q3);
+#if DCT_HIGH_BIT_DEPTH
+      overflow = check_epi16_overflow_x4(&r0, &r1, &r2, &r3);
+      if (overflow) {
+        vpx_highbd_fdct8x8_c(input, output, stride);
+        return;
+      }
+#endif  // DCT_HIGH_BIT_DEPTH
+      // Interleave to do the multiply by constants which gets us into 32bits
+      {
+        const __m128i t0 = _mm_unpacklo_epi16(r0, r1);
+        const __m128i t1 = _mm_unpackhi_epi16(r0, r1);
+        const __m128i t2 = _mm_unpacklo_epi16(r2, r3);
+        const __m128i t3 = _mm_unpackhi_epi16(r2, r3);
+        const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p16_p16);
+        const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p16_p16);
+        const __m128i u2 = _mm_madd_epi16(t0, k__cospi_p16_m16);
+        const __m128i u3 = _mm_madd_epi16(t1, k__cospi_p16_m16);
+        const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p24_p08);
+        const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p24_p08);
+        const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m08_p24);
+        const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m08_p24);
+        // dct_const_round_shift
+        const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+        const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+        const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+        const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+        const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
+        const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
+        const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
+        const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
+        const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+        const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+        const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+        const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+        const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+        const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
+        const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+        const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
+        // Combine
+        res0 = _mm_packs_epi32(w0, w1);
+        res4 = _mm_packs_epi32(w2, w3);
+        res2 = _mm_packs_epi32(w4, w5);
+        res6 = _mm_packs_epi32(w6, w7);
+#if DCT_HIGH_BIT_DEPTH
+        overflow = check_epi16_overflow_x4(&res0, &res4, &res2, &res6);
+        if (overflow) {
+          vpx_highbd_fdct8x8_c(input, output, stride);
+          return;
+        }
+#endif  // DCT_HIGH_BIT_DEPTH
+      }
+    }
+    // Work on next four results
+    {
+      // Interleave to do the multiply by constants which gets us into 32bits
+      const __m128i d0 = _mm_unpacklo_epi16(q6, q5);
+      const __m128i d1 = _mm_unpackhi_epi16(q6, q5);
+      const __m128i e0 = _mm_madd_epi16(d0, k__cospi_p16_m16);
+      const __m128i e1 = _mm_madd_epi16(d1, k__cospi_p16_m16);
+      const __m128i e2 = _mm_madd_epi16(d0, k__cospi_p16_p16);
+      const __m128i e3 = _mm_madd_epi16(d1, k__cospi_p16_p16);
+      // dct_const_round_shift
+      const __m128i f0 = _mm_add_epi32(e0, k__DCT_CONST_ROUNDING);
+      const __m128i f1 = _mm_add_epi32(e1, k__DCT_CONST_ROUNDING);
+      const __m128i f2 = _mm_add_epi32(e2, k__DCT_CONST_ROUNDING);
+      const __m128i f3 = _mm_add_epi32(e3, k__DCT_CONST_ROUNDING);
+      const __m128i s0 = _mm_srai_epi32(f0, DCT_CONST_BITS);
+      const __m128i s1 = _mm_srai_epi32(f1, DCT_CONST_BITS);
+      const __m128i s2 = _mm_srai_epi32(f2, DCT_CONST_BITS);
+      const __m128i s3 = _mm_srai_epi32(f3, DCT_CONST_BITS);
+      // Combine
+      const __m128i r0 = _mm_packs_epi32(s0, s1);
+      const __m128i r1 = _mm_packs_epi32(s2, s3);
+#if DCT_HIGH_BIT_DEPTH
+      overflow = check_epi16_overflow_x2(&r0, &r1);
+      if (overflow) {
+        vpx_highbd_fdct8x8_c(input, output, stride);
+        return;
+      }
+#endif  // DCT_HIGH_BIT_DEPTH
+      {
+        // Add/subtract
+        const __m128i x0 = ADD_EPI16(q4, r0);
+        const __m128i x1 = SUB_EPI16(q4, r0);
+        const __m128i x2 = SUB_EPI16(q7, r1);
+        const __m128i x3 = ADD_EPI16(q7, r1);
+#if DCT_HIGH_BIT_DEPTH
+        overflow = check_epi16_overflow_x4(&x0, &x1, &x2, &x3);
+        if (overflow) {
+          vpx_highbd_fdct8x8_c(input, output, stride);
+          return;
+        }
+#endif  // DCT_HIGH_BIT_DEPTH
+        // Interleave to do the multiply by constants which gets us into 32bits
+        {
+          const __m128i t0 = _mm_unpacklo_epi16(x0, x3);
+          const __m128i t1 = _mm_unpackhi_epi16(x0, x3);
+          const __m128i t2 = _mm_unpacklo_epi16(x1, x2);
+          const __m128i t3 = _mm_unpackhi_epi16(x1, x2);
+          const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p28_p04);
+          const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p28_p04);
+          const __m128i u2 = _mm_madd_epi16(t0, k__cospi_m04_p28);
+          const __m128i u3 = _mm_madd_epi16(t1, k__cospi_m04_p28);
+          const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p12_p20);
+          const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p12_p20);
+          const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m20_p12);
+          const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m20_p12);
+          // dct_const_round_shift
+          const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+          const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+          const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+          const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+          const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
+          const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
+          const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
+          const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
+          const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+          const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+          const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+          const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+          const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+          const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
+          const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+          const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
+          // Combine
+          res1 = _mm_packs_epi32(w0, w1);
+          res7 = _mm_packs_epi32(w2, w3);
+          res5 = _mm_packs_epi32(w4, w5);
+          res3 = _mm_packs_epi32(w6, w7);
+#if DCT_HIGH_BIT_DEPTH
+          overflow = check_epi16_overflow_x4(&res1, &res7, &res5, &res3);
+          if (overflow) {
+            vpx_highbd_fdct8x8_c(input, output, stride);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+      }
+    }
+    // Transpose the 8x8.
+    {
+      // 00 01 02 03 04 05 06 07
+      // 10 11 12 13 14 15 16 17
+      // 20 21 22 23 24 25 26 27
+      // 30 31 32 33 34 35 36 37
+      // 40 41 42 43 44 45 46 47
+      // 50 51 52 53 54 55 56 57
+      // 60 61 62 63 64 65 66 67
+      // 70 71 72 73 74 75 76 77
+      const __m128i tr0_0 = _mm_unpacklo_epi16(res0, res1);
+      const __m128i tr0_1 = _mm_unpacklo_epi16(res2, res3);
+      const __m128i tr0_2 = _mm_unpackhi_epi16(res0, res1);
+      const __m128i tr0_3 = _mm_unpackhi_epi16(res2, res3);
+      const __m128i tr0_4 = _mm_unpacklo_epi16(res4, res5);
+      const __m128i tr0_5 = _mm_unpacklo_epi16(res6, res7);
+      const __m128i tr0_6 = _mm_unpackhi_epi16(res4, res5);
+      const __m128i tr0_7 = _mm_unpackhi_epi16(res6, res7);
+      // 00 10 01 11 02 12 03 13
+      // 20 30 21 31 22 32 23 33
+      // 04 14 05 15 06 16 07 17
+      // 24 34 25 35 26 36 27 37
+      // 40 50 41 51 42 52 43 53
+      // 60 70 61 71 62 72 63 73
+      // 54 54 55 55 56 56 57 57
+      // 64 74 65 75 66 76 67 77
+      const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+      const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3);
+      const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
+      const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3);
+      const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5);
+      const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);
+      const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5);
+      const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);
+      // 00 10 20 30 01 11 21 31
+      // 40 50 60 70 41 51 61 71
+      // 02 12 22 32 03 13 23 33
+      // 42 52 62 72 43 53 63 73
+      // 04 14 24 34 05 15 21 36
+      // 44 54 64 74 45 55 61 76
+      // 06 16 26 36 07 17 27 37
+      // 46 56 66 76 47 57 67 77
+      in0 = _mm_unpacklo_epi64(tr1_0, tr1_4);
+      in1 = _mm_unpackhi_epi64(tr1_0, tr1_4);
+      in2 = _mm_unpacklo_epi64(tr1_2, tr1_6);
+      in3 = _mm_unpackhi_epi64(tr1_2, tr1_6);
+      in4 = _mm_unpacklo_epi64(tr1_1, tr1_5);
+      in5 = _mm_unpackhi_epi64(tr1_1, tr1_5);
+      in6 = _mm_unpacklo_epi64(tr1_3, tr1_7);
+      in7 = _mm_unpackhi_epi64(tr1_3, tr1_7);
+      // 00 10 20 30 40 50 60 70
+      // 01 11 21 31 41 51 61 71
+      // 02 12 22 32 42 52 62 72
+      // 03 13 23 33 43 53 63 73
+      // 04 14 24 34 44 54 64 74
+      // 05 15 25 35 45 55 65 75
+      // 06 16 26 36 46 56 66 76
+      // 07 17 27 37 47 57 67 77
+    }
+  }
+  // Post-condition output and store it
+  {
+    // Post-condition (division by two)
+    //    division of two 16 bits signed numbers using shifts
+    //    n / 2 = (n - (n >> 15)) >> 1
+    const __m128i sign_in0 = _mm_srai_epi16(in0, 15);
+    const __m128i sign_in1 = _mm_srai_epi16(in1, 15);
+    const __m128i sign_in2 = _mm_srai_epi16(in2, 15);
+    const __m128i sign_in3 = _mm_srai_epi16(in3, 15);
+    const __m128i sign_in4 = _mm_srai_epi16(in4, 15);
+    const __m128i sign_in5 = _mm_srai_epi16(in5, 15);
+    const __m128i sign_in6 = _mm_srai_epi16(in6, 15);
+    const __m128i sign_in7 = _mm_srai_epi16(in7, 15);
+    in0 = _mm_sub_epi16(in0, sign_in0);
+    in1 = _mm_sub_epi16(in1, sign_in1);
+    in2 = _mm_sub_epi16(in2, sign_in2);
+    in3 = _mm_sub_epi16(in3, sign_in3);
+    in4 = _mm_sub_epi16(in4, sign_in4);
+    in5 = _mm_sub_epi16(in5, sign_in5);
+    in6 = _mm_sub_epi16(in6, sign_in6);
+    in7 = _mm_sub_epi16(in7, sign_in7);
+    in0 = _mm_srai_epi16(in0, 1);
+    in1 = _mm_srai_epi16(in1, 1);
+    in2 = _mm_srai_epi16(in2, 1);
+    in3 = _mm_srai_epi16(in3, 1);
+    in4 = _mm_srai_epi16(in4, 1);
+    in5 = _mm_srai_epi16(in5, 1);
+    in6 = _mm_srai_epi16(in6, 1);
+    in7 = _mm_srai_epi16(in7, 1);
+    // store results
+    store_output(&in0, (output + 0 * 8));
+    store_output(&in1, (output + 1 * 8));
+    store_output(&in2, (output + 2 * 8));
+    store_output(&in3, (output + 3 * 8));
+    store_output(&in4, (output + 4 * 8));
+    store_output(&in5, (output + 5 * 8));
+    store_output(&in6, (output + 6 * 8));
+    store_output(&in7, (output + 7 * 8));
+  }
+}
+
+void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) {
+  // The 2D transform is done with two passes which are actually pretty
+  // similar. In the first one, we transform the columns and transpose
+  // the results. In the second one, we transform the rows. To achieve that,
+  // as the first pass results are transposed, we transpose the columns (that
+  // is the transposed rows) and transpose the results (so that it goes back
+  // in normal/row positions).
+  int pass;
+  // We need an intermediate buffer between passes.
+  DECLARE_ALIGNED(16, int16_t, intermediate[256]);
+  const int16_t *in = input;
+  int16_t *out0 = intermediate;
+  tran_low_t *out1 = output;
+  // Constants
+  //    When we use them, in one case, they are all the same. In all others
+  //    it's a pair of them that we need to repeat four times. This is done
+  //    by constructing the 32 bit constant corresponding to that pair.
+  const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
+  const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+  const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
+  const __m128i k__cospi_p08_m24 = pair_set_epi16(cospi_8_64, -cospi_24_64);
+  const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+  const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64);
+  const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64);
+  const __m128i k__cospi_p12_p20 = pair_set_epi16(cospi_12_64, cospi_20_64);
+  const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+  const __m128i k__cospi_p30_p02 = pair_set_epi16(cospi_30_64, cospi_2_64);
+  const __m128i k__cospi_p14_p18 = pair_set_epi16(cospi_14_64, cospi_18_64);
+  const __m128i k__cospi_m02_p30 = pair_set_epi16(-cospi_2_64, cospi_30_64);
+  const __m128i k__cospi_m18_p14 = pair_set_epi16(-cospi_18_64, cospi_14_64);
+  const __m128i k__cospi_p22_p10 = pair_set_epi16(cospi_22_64, cospi_10_64);
+  const __m128i k__cospi_p06_p26 = pair_set_epi16(cospi_6_64, cospi_26_64);
+  const __m128i k__cospi_m10_p22 = pair_set_epi16(-cospi_10_64, cospi_22_64);
+  const __m128i k__cospi_m26_p06 = pair_set_epi16(-cospi_26_64, cospi_6_64);
+  const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+  const __m128i kOne = _mm_set1_epi16(1);
+  // Do the two transform/transpose passes
+  for (pass = 0; pass < 2; ++pass) {
+    // We process eight columns (transposed rows in second pass) at a time.
+    int column_start;
+#if DCT_HIGH_BIT_DEPTH
+    int overflow;
+#endif
+    for (column_start = 0; column_start < 16; column_start += 8) {
+      __m128i in00, in01, in02, in03, in04, in05, in06, in07;
+      __m128i in08, in09, in10, in11, in12, in13, in14, in15;
+      __m128i input0, input1, input2, input3, input4, input5, input6, input7;
+      __m128i step1_0, step1_1, step1_2, step1_3;
+      __m128i step1_4, step1_5, step1_6, step1_7;
+      __m128i step2_1, step2_2, step2_3, step2_4, step2_5, step2_6;
+      __m128i step3_0, step3_1, step3_2, step3_3;
+      __m128i step3_4, step3_5, step3_6, step3_7;
+      __m128i res00, res01, res02, res03, res04, res05, res06, res07;
+      __m128i res08, res09, res10, res11, res12, res13, res14, res15;
+      // Load and pre-condition input.
+      if (0 == pass) {
+        in00 = _mm_load_si128((const __m128i *)(in + 0 * stride));
+        in01 = _mm_load_si128((const __m128i *)(in + 1 * stride));
+        in02 = _mm_load_si128((const __m128i *)(in + 2 * stride));
+        in03 = _mm_load_si128((const __m128i *)(in + 3 * stride));
+        in04 = _mm_load_si128((const __m128i *)(in + 4 * stride));
+        in05 = _mm_load_si128((const __m128i *)(in + 5 * stride));
+        in06 = _mm_load_si128((const __m128i *)(in + 6 * stride));
+        in07 = _mm_load_si128((const __m128i *)(in + 7 * stride));
+        in08 = _mm_load_si128((const __m128i *)(in + 8 * stride));
+        in09 = _mm_load_si128((const __m128i *)(in + 9 * stride));
+        in10 = _mm_load_si128((const __m128i *)(in + 10 * stride));
+        in11 = _mm_load_si128((const __m128i *)(in + 11 * stride));
+        in12 = _mm_load_si128((const __m128i *)(in + 12 * stride));
+        in13 = _mm_load_si128((const __m128i *)(in + 13 * stride));
+        in14 = _mm_load_si128((const __m128i *)(in + 14 * stride));
+        in15 = _mm_load_si128((const __m128i *)(in + 15 * stride));
+        // x = x << 2
+        in00 = _mm_slli_epi16(in00, 2);
+        in01 = _mm_slli_epi16(in01, 2);
+        in02 = _mm_slli_epi16(in02, 2);
+        in03 = _mm_slli_epi16(in03, 2);
+        in04 = _mm_slli_epi16(in04, 2);
+        in05 = _mm_slli_epi16(in05, 2);
+        in06 = _mm_slli_epi16(in06, 2);
+        in07 = _mm_slli_epi16(in07, 2);
+        in08 = _mm_slli_epi16(in08, 2);
+        in09 = _mm_slli_epi16(in09, 2);
+        in10 = _mm_slli_epi16(in10, 2);
+        in11 = _mm_slli_epi16(in11, 2);
+        in12 = _mm_slli_epi16(in12, 2);
+        in13 = _mm_slli_epi16(in13, 2);
+        in14 = _mm_slli_epi16(in14, 2);
+        in15 = _mm_slli_epi16(in15, 2);
+      } else {
+        in00 = _mm_load_si128((const __m128i *)(in + 0 * 16));
+        in01 = _mm_load_si128((const __m128i *)(in + 1 * 16));
+        in02 = _mm_load_si128((const __m128i *)(in + 2 * 16));
+        in03 = _mm_load_si128((const __m128i *)(in + 3 * 16));
+        in04 = _mm_load_si128((const __m128i *)(in + 4 * 16));
+        in05 = _mm_load_si128((const __m128i *)(in + 5 * 16));
+        in06 = _mm_load_si128((const __m128i *)(in + 6 * 16));
+        in07 = _mm_load_si128((const __m128i *)(in + 7 * 16));
+        in08 = _mm_load_si128((const __m128i *)(in + 8 * 16));
+        in09 = _mm_load_si128((const __m128i *)(in + 9 * 16));
+        in10 = _mm_load_si128((const __m128i *)(in + 10 * 16));
+        in11 = _mm_load_si128((const __m128i *)(in + 11 * 16));
+        in12 = _mm_load_si128((const __m128i *)(in + 12 * 16));
+        in13 = _mm_load_si128((const __m128i *)(in + 13 * 16));
+        in14 = _mm_load_si128((const __m128i *)(in + 14 * 16));
+        in15 = _mm_load_si128((const __m128i *)(in + 15 * 16));
+        // x = (x + 1) >> 2
+        in00 = _mm_add_epi16(in00, kOne);
+        in01 = _mm_add_epi16(in01, kOne);
+        in02 = _mm_add_epi16(in02, kOne);
+        in03 = _mm_add_epi16(in03, kOne);
+        in04 = _mm_add_epi16(in04, kOne);
+        in05 = _mm_add_epi16(in05, kOne);
+        in06 = _mm_add_epi16(in06, kOne);
+        in07 = _mm_add_epi16(in07, kOne);
+        in08 = _mm_add_epi16(in08, kOne);
+        in09 = _mm_add_epi16(in09, kOne);
+        in10 = _mm_add_epi16(in10, kOne);
+        in11 = _mm_add_epi16(in11, kOne);
+        in12 = _mm_add_epi16(in12, kOne);
+        in13 = _mm_add_epi16(in13, kOne);
+        in14 = _mm_add_epi16(in14, kOne);
+        in15 = _mm_add_epi16(in15, kOne);
+        in00 = _mm_srai_epi16(in00, 2);
+        in01 = _mm_srai_epi16(in01, 2);
+        in02 = _mm_srai_epi16(in02, 2);
+        in03 = _mm_srai_epi16(in03, 2);
+        in04 = _mm_srai_epi16(in04, 2);
+        in05 = _mm_srai_epi16(in05, 2);
+        in06 = _mm_srai_epi16(in06, 2);
+        in07 = _mm_srai_epi16(in07, 2);
+        in08 = _mm_srai_epi16(in08, 2);
+        in09 = _mm_srai_epi16(in09, 2);
+        in10 = _mm_srai_epi16(in10, 2);
+        in11 = _mm_srai_epi16(in11, 2);
+        in12 = _mm_srai_epi16(in12, 2);
+        in13 = _mm_srai_epi16(in13, 2);
+        in14 = _mm_srai_epi16(in14, 2);
+        in15 = _mm_srai_epi16(in15, 2);
+      }
+      in += 8;
+      // Calculate input for the first 8 results.
+      {
+        input0 = ADD_EPI16(in00, in15);
+        input1 = ADD_EPI16(in01, in14);
+        input2 = ADD_EPI16(in02, in13);
+        input3 = ADD_EPI16(in03, in12);
+        input4 = ADD_EPI16(in04, in11);
+        input5 = ADD_EPI16(in05, in10);
+        input6 = ADD_EPI16(in06, in09);
+        input7 = ADD_EPI16(in07, in08);
+#if DCT_HIGH_BIT_DEPTH
+        overflow = check_epi16_overflow_x8(&input0, &input1, &input2, &input3,
+                                           &input4, &input5, &input6, &input7);
+        if (overflow) {
+          vpx_highbd_fdct16x16_c(input, output, stride);
+          return;
+        }
+#endif  // DCT_HIGH_BIT_DEPTH
+      }
+      // Calculate input for the next 8 results.
+      {
+        step1_0 = SUB_EPI16(in07, in08);
+        step1_1 = SUB_EPI16(in06, in09);
+        step1_2 = SUB_EPI16(in05, in10);
+        step1_3 = SUB_EPI16(in04, in11);
+        step1_4 = SUB_EPI16(in03, in12);
+        step1_5 = SUB_EPI16(in02, in13);
+        step1_6 = SUB_EPI16(in01, in14);
+        step1_7 = SUB_EPI16(in00, in15);
+#if DCT_HIGH_BIT_DEPTH
+        overflow =
+            check_epi16_overflow_x8(&step1_0, &step1_1, &step1_2, &step1_3,
+                                    &step1_4, &step1_5, &step1_6, &step1_7);
+        if (overflow) {
+          vpx_highbd_fdct16x16_c(input, output, stride);
+          return;
+        }
+#endif  // DCT_HIGH_BIT_DEPTH
+      }
+      // Work on the first eight values; fdct8(input, even_results);
+      {
+        // Add/subtract
+        const __m128i q0 = ADD_EPI16(input0, input7);
+        const __m128i q1 = ADD_EPI16(input1, input6);
+        const __m128i q2 = ADD_EPI16(input2, input5);
+        const __m128i q3 = ADD_EPI16(input3, input4);
+        const __m128i q4 = SUB_EPI16(input3, input4);
+        const __m128i q5 = SUB_EPI16(input2, input5);
+        const __m128i q6 = SUB_EPI16(input1, input6);
+        const __m128i q7 = SUB_EPI16(input0, input7);
+#if DCT_HIGH_BIT_DEPTH
+        overflow =
+            check_epi16_overflow_x8(&q0, &q1, &q2, &q3, &q4, &q5, &q6, &q7);
+        if (overflow) {
+          vpx_highbd_fdct16x16_c(input, output, stride);
+          return;
+        }
+#endif  // DCT_HIGH_BIT_DEPTH
+        // Work on first four results
+        {
+          // Add/subtract
+          const __m128i r0 = ADD_EPI16(q0, q3);
+          const __m128i r1 = ADD_EPI16(q1, q2);
+          const __m128i r2 = SUB_EPI16(q1, q2);
+          const __m128i r3 = SUB_EPI16(q0, q3);
+#if DCT_HIGH_BIT_DEPTH
+          overflow = check_epi16_overflow_x4(&r0, &r1, &r2, &r3);
+          if (overflow) {
+            vpx_highbd_fdct16x16_c(input, output, stride);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+          // Interleave to do the multiply by constants which gets us
+          // into 32 bits.
+          {
+            const __m128i t0 = _mm_unpacklo_epi16(r0, r1);
+            const __m128i t1 = _mm_unpackhi_epi16(r0, r1);
+            const __m128i t2 = _mm_unpacklo_epi16(r2, r3);
+            const __m128i t3 = _mm_unpackhi_epi16(r2, r3);
+            res00 = mult_round_shift(&t0, &t1, &k__cospi_p16_p16,
+                                     &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+            res08 = mult_round_shift(&t0, &t1, &k__cospi_p16_m16,
+                                     &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+            res04 = mult_round_shift(&t2, &t3, &k__cospi_p24_p08,
+                                     &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+            res12 = mult_round_shift(&t2, &t3, &k__cospi_m08_p24,
+                                     &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+#if DCT_HIGH_BIT_DEPTH
+            overflow = check_epi16_overflow_x4(&res00, &res08, &res04, &res12);
+            if (overflow) {
+              vpx_highbd_fdct16x16_c(input, output, stride);
+              return;
+            }
+#endif  // DCT_HIGH_BIT_DEPTH
+          }
+        }
+        // Work on next four results
+        {
+          // Interleave to do the multiply by constants which gets us
+          // into 32 bits.
+          const __m128i d0 = _mm_unpacklo_epi16(q6, q5);
+          const __m128i d1 = _mm_unpackhi_epi16(q6, q5);
+          const __m128i r0 =
+              mult_round_shift(&d0, &d1, &k__cospi_p16_m16,
+                               &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+          const __m128i r1 =
+              mult_round_shift(&d0, &d1, &k__cospi_p16_p16,
+                               &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+#if DCT_HIGH_BIT_DEPTH
+          overflow = check_epi16_overflow_x2(&r0, &r1);
+          if (overflow) {
+            vpx_highbd_fdct16x16_c(input, output, stride);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+          {
+            // Add/subtract
+            const __m128i x0 = ADD_EPI16(q4, r0);
+            const __m128i x1 = SUB_EPI16(q4, r0);
+            const __m128i x2 = SUB_EPI16(q7, r1);
+            const __m128i x3 = ADD_EPI16(q7, r1);
+#if DCT_HIGH_BIT_DEPTH
+            overflow = check_epi16_overflow_x4(&x0, &x1, &x2, &x3);
+            if (overflow) {
+              vpx_highbd_fdct16x16_c(input, output, stride);
+              return;
+            }
+#endif  // DCT_HIGH_BIT_DEPTH
+            // Interleave to do the multiply by constants which gets us
+            // into 32 bits.
+            {
+              const __m128i t0 = _mm_unpacklo_epi16(x0, x3);
+              const __m128i t1 = _mm_unpackhi_epi16(x0, x3);
+              const __m128i t2 = _mm_unpacklo_epi16(x1, x2);
+              const __m128i t3 = _mm_unpackhi_epi16(x1, x2);
+              res02 = mult_round_shift(&t0, &t1, &k__cospi_p28_p04,
+                                       &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+              res14 = mult_round_shift(&t0, &t1, &k__cospi_m04_p28,
+                                       &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+              res10 = mult_round_shift(&t2, &t3, &k__cospi_p12_p20,
+                                       &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+              res06 = mult_round_shift(&t2, &t3, &k__cospi_m20_p12,
+                                       &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+#if DCT_HIGH_BIT_DEPTH
+              overflow =
+                  check_epi16_overflow_x4(&res02, &res14, &res10, &res06);
+              if (overflow) {
+                vpx_highbd_fdct16x16_c(input, output, stride);
+                return;
+              }
+#endif  // DCT_HIGH_BIT_DEPTH
+            }
+          }
+        }
+      }
+      // Work on the next eight values; step1 -> odd_results
+      {
+        // step 2
+        {
+          const __m128i t0 = _mm_unpacklo_epi16(step1_5, step1_2);
+          const __m128i t1 = _mm_unpackhi_epi16(step1_5, step1_2);
+          const __m128i t2 = _mm_unpacklo_epi16(step1_4, step1_3);
+          const __m128i t3 = _mm_unpackhi_epi16(step1_4, step1_3);
+          step2_2 = mult_round_shift(&t0, &t1, &k__cospi_p16_m16,
+                                     &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+          step2_3 = mult_round_shift(&t2, &t3, &k__cospi_p16_m16,
+                                     &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+          step2_5 = mult_round_shift(&t0, &t1, &k__cospi_p16_p16,
+                                     &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+          step2_4 = mult_round_shift(&t2, &t3, &k__cospi_p16_p16,
+                                     &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+#if DCT_HIGH_BIT_DEPTH
+          overflow =
+              check_epi16_overflow_x4(&step2_2, &step2_3, &step2_5, &step2_4);
+          if (overflow) {
+            vpx_highbd_fdct16x16_c(input, output, stride);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        // step 3
+        {
+          step3_0 = ADD_EPI16(step1_0, step2_3);
+          step3_1 = ADD_EPI16(step1_1, step2_2);
+          step3_2 = SUB_EPI16(step1_1, step2_2);
+          step3_3 = SUB_EPI16(step1_0, step2_3);
+          step3_4 = SUB_EPI16(step1_7, step2_4);
+          step3_5 = SUB_EPI16(step1_6, step2_5);
+          step3_6 = ADD_EPI16(step1_6, step2_5);
+          step3_7 = ADD_EPI16(step1_7, step2_4);
+#if DCT_HIGH_BIT_DEPTH
+          overflow =
+              check_epi16_overflow_x8(&step3_0, &step3_1, &step3_2, &step3_3,
+                                      &step3_4, &step3_5, &step3_6, &step3_7);
+          if (overflow) {
+            vpx_highbd_fdct16x16_c(input, output, stride);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        // step 4
+        {
+          const __m128i t0 = _mm_unpacklo_epi16(step3_1, step3_6);
+          const __m128i t1 = _mm_unpackhi_epi16(step3_1, step3_6);
+          const __m128i t2 = _mm_unpacklo_epi16(step3_2, step3_5);
+          const __m128i t3 = _mm_unpackhi_epi16(step3_2, step3_5);
+          step2_1 = mult_round_shift(&t0, &t1, &k__cospi_m08_p24,
+                                     &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+          step2_2 = mult_round_shift(&t2, &t3, &k__cospi_p24_p08,
+                                     &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+          step2_6 = mult_round_shift(&t0, &t1, &k__cospi_p24_p08,
+                                     &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+          step2_5 = mult_round_shift(&t2, &t3, &k__cospi_p08_m24,
+                                     &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+#if DCT_HIGH_BIT_DEPTH
+          overflow =
+              check_epi16_overflow_x4(&step2_1, &step2_2, &step2_6, &step2_5);
+          if (overflow) {
+            vpx_highbd_fdct16x16_c(input, output, stride);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        // step 5
+        {
+          step1_0 = ADD_EPI16(step3_0, step2_1);
+          step1_1 = SUB_EPI16(step3_0, step2_1);
+          step1_2 = ADD_EPI16(step3_3, step2_2);
+          step1_3 = SUB_EPI16(step3_3, step2_2);
+          step1_4 = SUB_EPI16(step3_4, step2_5);
+          step1_5 = ADD_EPI16(step3_4, step2_5);
+          step1_6 = SUB_EPI16(step3_7, step2_6);
+          step1_7 = ADD_EPI16(step3_7, step2_6);
+#if DCT_HIGH_BIT_DEPTH
+          overflow =
+              check_epi16_overflow_x8(&step1_0, &step1_1, &step1_2, &step1_3,
+                                      &step1_4, &step1_5, &step1_6, &step1_7);
+          if (overflow) {
+            vpx_highbd_fdct16x16_c(input, output, stride);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        // step 6
+        {
+          const __m128i t0 = _mm_unpacklo_epi16(step1_0, step1_7);
+          const __m128i t1 = _mm_unpackhi_epi16(step1_0, step1_7);
+          const __m128i t2 = _mm_unpacklo_epi16(step1_1, step1_6);
+          const __m128i t3 = _mm_unpackhi_epi16(step1_1, step1_6);
+          res01 = mult_round_shift(&t0, &t1, &k__cospi_p30_p02,
+                                   &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+          res09 = mult_round_shift(&t2, &t3, &k__cospi_p14_p18,
+                                   &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+          res15 = mult_round_shift(&t0, &t1, &k__cospi_m02_p30,
+                                   &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+          res07 = mult_round_shift(&t2, &t3, &k__cospi_m18_p14,
+                                   &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+#if DCT_HIGH_BIT_DEPTH
+          overflow = check_epi16_overflow_x4(&res01, &res09, &res15, &res07);
+          if (overflow) {
+            vpx_highbd_fdct16x16_c(input, output, stride);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+        {
+          const __m128i t0 = _mm_unpacklo_epi16(step1_2, step1_5);
+          const __m128i t1 = _mm_unpackhi_epi16(step1_2, step1_5);
+          const __m128i t2 = _mm_unpacklo_epi16(step1_3, step1_4);
+          const __m128i t3 = _mm_unpackhi_epi16(step1_3, step1_4);
+          res05 = mult_round_shift(&t0, &t1, &k__cospi_p22_p10,
+                                   &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+          res13 = mult_round_shift(&t2, &t3, &k__cospi_p06_p26,
+                                   &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+          res11 = mult_round_shift(&t0, &t1, &k__cospi_m10_p22,
+                                   &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+          res03 = mult_round_shift(&t2, &t3, &k__cospi_m26_p06,
+                                   &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+#if DCT_HIGH_BIT_DEPTH
+          overflow = check_epi16_overflow_x4(&res05, &res13, &res11, &res03);
+          if (overflow) {
+            vpx_highbd_fdct16x16_c(input, output, stride);
+            return;
+          }
+#endif  // DCT_HIGH_BIT_DEPTH
+        }
+      }
+      // Transpose the results, do it as two 8x8 transposes.
+      transpose_and_output8x8(&res00, &res01, &res02, &res03, &res04, &res05,
+                              &res06, &res07, pass, out0, out1);
+      transpose_and_output8x8(&res08, &res09, &res10, &res11, &res12, &res13,
+                              &res14, &res15, pass, out0 + 8, out1 + 8);
+      if (pass == 0) {
+        out0 += 8 * 16;
+      } else {
+        out1 += 8 * 16;
+      }
+    }
+    // Setup in/out for next pass.
+    in = intermediate;
+  }
+}
+
+#undef ADD_EPI16
+#undef SUB_EPI16
diff --git a/av1/common/x86/vp10_fwd_txfm_sse2.c b/av1/common/x86/vp10_fwd_txfm_sse2.c
new file mode 100644
index 0000000..05ec539
--- /dev/null
+++ b/av1/common/x86/vp10_fwd_txfm_sse2.c
@@ -0,0 +1,272 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <emmintrin.h>  // SSE2
+
+#include "./vp10_rtcd.h"
+#include "./vpx_config.h"
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_dsp/x86/fwd_txfm_sse2.h"
+
+void vp10_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
+  __m128i in0, in1;
+  __m128i tmp;
+  const __m128i zero = _mm_setzero_si128();
+  in0 = _mm_loadl_epi64((const __m128i *)(input + 0 * stride));
+  in1 = _mm_loadl_epi64((const __m128i *)(input + 1 * stride));
+  in1 = _mm_unpacklo_epi64(
+      in1, _mm_loadl_epi64((const __m128i *)(input + 2 * stride)));
+  in0 = _mm_unpacklo_epi64(
+      in0, _mm_loadl_epi64((const __m128i *)(input + 3 * stride)));
+
+  tmp = _mm_add_epi16(in0, in1);
+  in0 = _mm_unpacklo_epi16(zero, tmp);
+  in1 = _mm_unpackhi_epi16(zero, tmp);
+  in0 = _mm_srai_epi32(in0, 16);
+  in1 = _mm_srai_epi32(in1, 16);
+
+  tmp = _mm_add_epi32(in0, in1);
+  in0 = _mm_unpacklo_epi32(tmp, zero);
+  in1 = _mm_unpackhi_epi32(tmp, zero);
+
+  tmp = _mm_add_epi32(in0, in1);
+  in0 = _mm_srli_si128(tmp, 8);
+
+  in1 = _mm_add_epi32(tmp, in0);
+  in0 = _mm_slli_epi32(in1, 1);
+  store_output(&in0, output);
+}
+
+void vp10_fdct8x8_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
+  __m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
+  __m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
+  __m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
+  __m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
+  __m128i u0, u1, sum;
+
+  u0 = _mm_add_epi16(in0, in1);
+  u1 = _mm_add_epi16(in2, in3);
+
+  in0 = _mm_load_si128((const __m128i *)(input + 4 * stride));
+  in1 = _mm_load_si128((const __m128i *)(input + 5 * stride));
+  in2 = _mm_load_si128((const __m128i *)(input + 6 * stride));
+  in3 = _mm_load_si128((const __m128i *)(input + 7 * stride));
+
+  sum = _mm_add_epi16(u0, u1);
+
+  in0 = _mm_add_epi16(in0, in1);
+  in2 = _mm_add_epi16(in2, in3);
+  sum = _mm_add_epi16(sum, in0);
+
+  u0 = _mm_setzero_si128();
+  sum = _mm_add_epi16(sum, in2);
+
+  in0 = _mm_unpacklo_epi16(u0, sum);
+  in1 = _mm_unpackhi_epi16(u0, sum);
+  in0 = _mm_srai_epi32(in0, 16);
+  in1 = _mm_srai_epi32(in1, 16);
+
+  sum = _mm_add_epi32(in0, in1);
+  in0 = _mm_unpacklo_epi32(sum, u0);
+  in1 = _mm_unpackhi_epi32(sum, u0);
+
+  sum = _mm_add_epi32(in0, in1);
+  in0 = _mm_srli_si128(sum, 8);
+
+  in1 = _mm_add_epi32(sum, in0);
+  store_output(&in1, output);
+}
+
+void vp10_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output,
+                           int stride) {
+  __m128i in0, in1, in2, in3;
+  __m128i u0, u1;
+  __m128i sum = _mm_setzero_si128();
+  int i;
+
+  for (i = 0; i < 2; ++i) {
+    input += 8 * i;
+    in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
+    in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
+    in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
+    in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
+
+    u0 = _mm_add_epi16(in0, in1);
+    u1 = _mm_add_epi16(in2, in3);
+    sum = _mm_add_epi16(sum, u0);
+
+    in0 = _mm_load_si128((const __m128i *)(input + 4 * stride));
+    in1 = _mm_load_si128((const __m128i *)(input + 5 * stride));
+    in2 = _mm_load_si128((const __m128i *)(input + 6 * stride));
+    in3 = _mm_load_si128((const __m128i *)(input + 7 * stride));
+
+    sum = _mm_add_epi16(sum, u1);
+    u0 = _mm_add_epi16(in0, in1);
+    u1 = _mm_add_epi16(in2, in3);
+    sum = _mm_add_epi16(sum, u0);
+
+    in0 = _mm_load_si128((const __m128i *)(input + 8 * stride));
+    in1 = _mm_load_si128((const __m128i *)(input + 9 * stride));
+    in2 = _mm_load_si128((const __m128i *)(input + 10 * stride));
+    in3 = _mm_load_si128((const __m128i *)(input + 11 * stride));
+
+    sum = _mm_add_epi16(sum, u1);
+    u0 = _mm_add_epi16(in0, in1);
+    u1 = _mm_add_epi16(in2, in3);
+    sum = _mm_add_epi16(sum, u0);
+
+    in0 = _mm_load_si128((const __m128i *)(input + 12 * stride));
+    in1 = _mm_load_si128((const __m128i *)(input + 13 * stride));
+    in2 = _mm_load_si128((const __m128i *)(input + 14 * stride));
+    in3 = _mm_load_si128((const __m128i *)(input + 15 * stride));
+
+    sum = _mm_add_epi16(sum, u1);
+    u0 = _mm_add_epi16(in0, in1);
+    u1 = _mm_add_epi16(in2, in3);
+    sum = _mm_add_epi16(sum, u0);
+
+    sum = _mm_add_epi16(sum, u1);
+  }
+
+  u0 = _mm_setzero_si128();
+  in0 = _mm_unpacklo_epi16(u0, sum);
+  in1 = _mm_unpackhi_epi16(u0, sum);
+  in0 = _mm_srai_epi32(in0, 16);
+  in1 = _mm_srai_epi32(in1, 16);
+
+  sum = _mm_add_epi32(in0, in1);
+  in0 = _mm_unpacklo_epi32(sum, u0);
+  in1 = _mm_unpackhi_epi32(sum, u0);
+
+  sum = _mm_add_epi32(in0, in1);
+  in0 = _mm_srli_si128(sum, 8);
+
+  in1 = _mm_add_epi32(sum, in0);
+  in1 = _mm_srai_epi32(in1, 1);
+  store_output(&in1, output);
+}
+
+void vp10_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output,
+                           int stride) {
+  __m128i in0, in1, in2, in3;
+  __m128i u0, u1;
+  __m128i sum = _mm_setzero_si128();
+  int i;
+
+  for (i = 0; i < 8; ++i) {
+    in0 = _mm_load_si128((const __m128i *)(input + 0));
+    in1 = _mm_load_si128((const __m128i *)(input + 8));
+    in2 = _mm_load_si128((const __m128i *)(input + 16));
+    in3 = _mm_load_si128((const __m128i *)(input + 24));
+
+    input += stride;
+    u0 = _mm_add_epi16(in0, in1);
+    u1 = _mm_add_epi16(in2, in3);
+    sum = _mm_add_epi16(sum, u0);
+
+    in0 = _mm_load_si128((const __m128i *)(input + 0));
+    in1 = _mm_load_si128((const __m128i *)(input + 8));
+    in2 = _mm_load_si128((const __m128i *)(input + 16));
+    in3 = _mm_load_si128((const __m128i *)(input + 24));
+
+    input += stride;
+    sum = _mm_add_epi16(sum, u1);
+    u0 = _mm_add_epi16(in0, in1);
+    u1 = _mm_add_epi16(in2, in3);
+    sum = _mm_add_epi16(sum, u0);
+
+    in0 = _mm_load_si128((const __m128i *)(input + 0));
+    in1 = _mm_load_si128((const __m128i *)(input + 8));
+    in2 = _mm_load_si128((const __m128i *)(input + 16));
+    in3 = _mm_load_si128((const __m128i *)(input + 24));
+
+    input += stride;
+    sum = _mm_add_epi16(sum, u1);
+    u0 = _mm_add_epi16(in0, in1);
+    u1 = _mm_add_epi16(in2, in3);
+    sum = _mm_add_epi16(sum, u0);
+
+    in0 = _mm_load_si128((const __m128i *)(input + 0));
+    in1 = _mm_load_si128((const __m128i *)(input + 8));
+    in2 = _mm_load_si128((const __m128i *)(input + 16));
+    in3 = _mm_load_si128((const __m128i *)(input + 24));
+
+    input += stride;
+    sum = _mm_add_epi16(sum, u1);
+    u0 = _mm_add_epi16(in0, in1);
+    u1 = _mm_add_epi16(in2, in3);
+    sum = _mm_add_epi16(sum, u0);
+
+    sum = _mm_add_epi16(sum, u1);
+  }
+
+  u0 = _mm_setzero_si128();
+  in0 = _mm_unpacklo_epi16(u0, sum);
+  in1 = _mm_unpackhi_epi16(u0, sum);
+  in0 = _mm_srai_epi32(in0, 16);
+  in1 = _mm_srai_epi32(in1, 16);
+
+  sum = _mm_add_epi32(in0, in1);
+  in0 = _mm_unpacklo_epi32(sum, u0);
+  in1 = _mm_unpackhi_epi32(sum, u0);
+
+  sum = _mm_add_epi32(in0, in1);
+  in0 = _mm_srli_si128(sum, 8);
+
+  in1 = _mm_add_epi32(sum, in0);
+  in1 = _mm_srai_epi32(in1, 3);
+  store_output(&in1, output);
+}
+
+#define DCT_HIGH_BIT_DEPTH 0
+#define FDCT4x4_2D vp10_fdct4x4_sse2
+#define FDCT8x8_2D vp10_fdct8x8_sse2
+#define FDCT16x16_2D vp10_fdct16x16_sse2
+#include "av1/common/x86/vp10_fwd_txfm_impl_sse2.h"
+#undef FDCT4x4_2D
+#undef FDCT8x8_2D
+#undef FDCT16x16_2D
+
+#define FDCT32x32_2D vp10_fdct32x32_rd_sse2
+#define FDCT32x32_HIGH_PRECISION 0
+#include "av1/common/x86/vp10_fwd_dct32x32_impl_sse2.h"
+#undef FDCT32x32_2D
+#undef FDCT32x32_HIGH_PRECISION
+
+#define FDCT32x32_2D vp10_fdct32x32_sse2
+#define FDCT32x32_HIGH_PRECISION 1
+#include "av1/common/x86/vp10_fwd_dct32x32_impl_sse2.h"  // NOLINT
+#undef FDCT32x32_2D
+#undef FDCT32x32_HIGH_PRECISION
+#undef DCT_HIGH_BIT_DEPTH
+
+#if CONFIG_VP9_HIGHBITDEPTH
+#define DCT_HIGH_BIT_DEPTH 1
+#define FDCT4x4_2D vp10_highbd_fdct4x4_sse2
+#define FDCT8x8_2D vp10_highbd_fdct8x8_sse2
+#define FDCT16x16_2D vp10_highbd_fdct16x16_sse2
+#include "av1/common/x86/vp10_fwd_txfm_impl_sse2.h"  // NOLINT
+#undef FDCT4x4_2D
+#undef FDCT8x8_2D
+#undef FDCT16x16_2D
+
+#define FDCT32x32_2D vp10_highbd_fdct32x32_rd_sse2
+#define FDCT32x32_HIGH_PRECISION 0
+#include "av1/common/x86/vp10_fwd_dct32x32_impl_sse2.h"  // NOLINT
+#undef FDCT32x32_2D
+#undef FDCT32x32_HIGH_PRECISION
+
+#define FDCT32x32_2D vp10_highbd_fdct32x32_sse2
+#define FDCT32x32_HIGH_PRECISION 1
+#include "av1/common/x86/vp10_fwd_dct32x32_impl_sse2.h"  // NOLINT
+#undef FDCT32x32_2D
+#undef FDCT32x32_HIGH_PRECISION
+#undef DCT_HIGH_BIT_DEPTH
+#endif  // CONFIG_VP9_HIGHBITDEPTH
diff --git a/av1/common/x86/vp10_highbd_convolve_filters_sse4.c b/av1/common/x86/vp10_highbd_convolve_filters_sse4.c
new file mode 100644
index 0000000..7f3630c
--- /dev/null
+++ b/av1/common/x86/vp10_highbd_convolve_filters_sse4.c
@@ -0,0 +1,394 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "./vpx_config.h"
+#include "av1/common/filter.h"
+
+#if CONFIG_VP9_HIGHBITDEPTH
+#if CONFIG_EXT_INTERP
+DECLARE_ALIGNED(16, const int16_t,
+                sub_pel_filters_10sharp_highbd_ver_signal_dir[15][6][8]) = {
+  {
+      { 0, 0, 0, 0, 0, 0, 0, 0 },
+      { -1, 3, -1, 3, -1, 3, -1, 3 },
+      { -6, 127, -6, 127, -6, 127, -6, 127 },
+      { 8, -4, 8, -4, 8, -4, 8, -4 },
+      { 2, -1, 2, -1, 2, -1, 2, -1 },
+      { 0, 0, 0, 0, 0, 0, 0, 0 },
+  },
+  {
+      { 0, 1, 0, 1, 0, 1, 0, 1 },
+      { -2, 5, -2, 5, -2, 5, -2, 5 },
+      { -12, 124, -12, 124, -12, 124, -12, 124 },
+      { 18, -7, 18, -7, 18, -7, 18, -7 },
+      { 3, -2, 3, -2, 3, -2, 3, -2 },
+      { 0, 0, 0, 0, 0, 0, 0, 0 },
+  },
+  {
+      { 0, 1, 0, 1, 0, 1, 0, 1 },
+      { -3, 7, -3, 7, -3, 7, -3, 7 },
+      { -17, 119, -17, 119, -17, 119, -17, 119 },
+      { 28, -11, 28, -11, 28, -11, 28, -11 },
+      { 5, -2, 5, -2, 5, -2, 5, -2 },
+      { 1, 0, 1, 0, 1, 0, 1, 0 },
+  },
+  {
+      { 0, 1, 0, 1, 0, 1, 0, 1 },
+      { -4, 8, -4, 8, -4, 8, -4, 8 },
+      { -20, 114, -20, 114, -20, 114, -20, 114 },
+      { 38, -14, 38, -14, 38, -14, 38, -14 },
+      { 7, -3, 7, -3, 7, -3, 7, -3 },
+      { 1, 0, 1, 0, 1, 0, 1, 0 },
+  },
+  {
+      { 0, 1, 0, 1, 0, 1, 0, 1 },
+      { -4, 9, -4, 9, -4, 9, -4, 9 },
+      { -22, 107, -22, 107, -22, 107, -22, 107 },
+      { 49, -17, 49, -17, 49, -17, 49, -17 },
+      { 8, -4, 8, -4, 8, -4, 8, -4 },
+      { 1, 0, 1, 0, 1, 0, 1, 0 },
+  },
+  {
+      { 0, 2, 0, 2, 0, 2, 0, 2 },
+      { -5, 10, -5, 10, -5, 10, -5, 10 },
+      { -24, 99, -24, 99, -24, 99, -24, 99 },
+      { 59, -20, 59, -20, 59, -20, 59, -20 },
+      { 9, -4, 9, -4, 9, -4, 9, -4 },
+      { 2, 0, 2, 0, 2, 0, 2, 0 },
+  },
+  {
+      { 0, 2, 0, 2, 0, 2, 0, 2 },
+      { -5, 10, -5, 10, -5, 10, -5, 10 },
+      { -24, 90, -24, 90, -24, 90, -24, 90 },
+      { 70, -22, 70, -22, 70, -22, 70, -22 },
+      { 10, -5, 10, -5, 10, -5, 10, -5 },
+      { 2, 0, 2, 0, 2, 0, 2, 0 },
+  },
+  {
+      { 0, 2, 0, 2, 0, 2, 0, 2 },
+      { -5, 10, -5, 10, -5, 10, -5, 10 },
+      { -23, 80, -23, 80, -23, 80, -23, 80 },
+      { 80, -23, 80, -23, 80, -23, 80, -23 },
+      { 10, -5, 10, -5, 10, -5, 10, -5 },
+      { 2, 0, 2, 0, 2, 0, 2, 0 },
+  },
+  {
+      { 0, 2, 0, 2, 0, 2, 0, 2 },
+      { -5, 10, -5, 10, -5, 10, -5, 10 },
+      { -22, 70, -22, 70, -22, 70, -22, 70 },
+      { 90, -24, 90, -24, 90, -24, 90, -24 },
+      { 10, -5, 10, -5, 10, -5, 10, -5 },
+      { 2, 0, 2, 0, 2, 0, 2, 0 },
+  },
+  {
+      { 0, 2, 0, 2, 0, 2, 0, 2 },
+      { -4, 9, -4, 9, -4, 9, -4, 9 },
+      { -20, 59, -20, 59, -20, 59, -20, 59 },
+      { 99, -24, 99, -24, 99, -24, 99, -24 },
+      { 10, -5, 10, -5, 10, -5, 10, -5 },
+      { 2, 0, 2, 0, 2, 0, 2, 0 },
+  },
+  {
+      { 0, 1, 0, 1, 0, 1, 0, 1 },
+      { -4, 8, -4, 8, -4, 8, -4, 8 },
+      { -17, 49, -17, 49, -17, 49, -17, 49 },
+      { 107, -22, 107, -22, 107, -22, 107, -22 },
+      { 9, -4, 9, -4, 9, -4, 9, -4 },
+      { 1, 0, 1, 0, 1, 0, 1, 0 },
+  },
+  {
+      { 0, 1, 0, 1, 0, 1, 0, 1 },
+      { -3, 7, -3, 7, -3, 7, -3, 7 },
+      { -14, 38, -14, 38, -14, 38, -14, 38 },
+      { 114, -20, 114, -20, 114, -20, 114, -20 },
+      { 8, -4, 8, -4, 8, -4, 8, -4 },
+      { 1, 0, 1, 0, 1, 0, 1, 0 },
+  },
+  {
+      { 0, 1, 0, 1, 0, 1, 0, 1 },
+      { -2, 5, -2, 5, -2, 5, -2, 5 },
+      { -11, 28, -11, 28, -11, 28, -11, 28 },
+      { 119, -17, 119, -17, 119, -17, 119, -17 },
+      { 7, -3, 7, -3, 7, -3, 7, -3 },
+      { 1, 0, 1, 0, 1, 0, 1, 0 },
+  },
+  {
+      { 0, 0, 0, 0, 0, 0, 0, 0 },
+      { -2, 3, -2, 3, -2, 3, -2, 3 },
+      { -7, 18, -7, 18, -7, 18, -7, 18 },
+      { 124, -12, 124, -12, 124, -12, 124, -12 },
+      { 5, -2, 5, -2, 5, -2, 5, -2 },
+      { 1, 0, 1, 0, 1, 0, 1, 0 },
+  },
+  {
+      { 0, 0, 0, 0, 0, 0, 0, 0 },
+      { -1, 2, -1, 2, -1, 2, -1, 2 },
+      { -4, 8, -4, 8, -4, 8, -4, 8 },
+      { 127, -6, 127, -6, 127, -6, 127, -6 },
+      { 3, -1, 3, -1, 3, -1, 3, -1 },
+      { 0, 0, 0, 0, 0, 0, 0, 0 },
+  },
+};
+#endif
+#endif
+#if CONFIG_VP9_HIGHBITDEPTH
+#if CONFIG_EXT_INTERP
+DECLARE_ALIGNED(16, const int16_t,
+                sub_pel_filters_12sharp_highbd_ver_signal_dir[15][6][8]) = {
+  {
+      { 0, 1, 0, 1, 0, 1, 0, 1 },
+      { -2, 3, -2, 3, -2, 3, -2, 3 },
+      { -7, 127, -7, 127, -7, 127, -7, 127 },
+      { 8, -4, 8, -4, 8, -4, 8, -4 },
+      { 2, -1, 2, -1, 2, -1, 2, -1 },
+      { 1, 0, 1, 0, 1, 0, 1, 0 },
+  },
+  {
+      { -1, 2, -1, 2, -1, 2, -1, 2 },
+      { -3, 6, -3, 6, -3, 6, -3, 6 },
+      { -13, 124, -13, 124, -13, 124, -13, 124 },
+      { 18, -8, 18, -8, 18, -8, 18, -8 },
+      { 4, -2, 4, -2, 4, -2, 4, -2 },
+      { 2, -1, 2, -1, 2, -1, 2, -1 },
+  },
+  {
+      { -1, 3, -1, 3, -1, 3, -1, 3 },
+      { -4, 8, -4, 8, -4, 8, -4, 8 },
+      { -18, 120, -18, 120, -18, 120, -18, 120 },
+      { 28, -12, 28, -12, 28, -12, 28, -12 },
+      { 7, -4, 7, -4, 7, -4, 7, -4 },
+      { 2, -1, 2, -1, 2, -1, 2, -1 },
+  },
+  {
+      { -1, 3, -1, 3, -1, 3, -1, 3 },
+      { -6, 10, -6, 10, -6, 10, -6, 10 },
+      { -21, 115, -21, 115, -21, 115, -21, 115 },
+      { 38, -15, 38, -15, 38, -15, 38, -15 },
+      { 8, -5, 8, -5, 8, -5, 8, -5 },
+      { 3, -1, 3, -1, 3, -1, 3, -1 },
+  },
+  {
+      { -2, 4, -2, 4, -2, 4, -2, 4 },
+      { -6, 12, -6, 12, -6, 12, -6, 12 },
+      { -24, 108, -24, 108, -24, 108, -24, 108 },
+      { 49, -18, 49, -18, 49, -18, 49, -18 },
+      { 10, -6, 10, -6, 10, -6, 10, -6 },
+      { 3, -2, 3, -2, 3, -2, 3, -2 },
+  },
+  {
+      { -2, 4, -2, 4, -2, 4, -2, 4 },
+      { -7, 13, -7, 13, -7, 13, -7, 13 },
+      { -25, 100, -25, 100, -25, 100, -25, 100 },
+      { 60, -21, 60, -21, 60, -21, 60, -21 },
+      { 11, -7, 11, -7, 11, -7, 11, -7 },
+      { 4, -2, 4, -2, 4, -2, 4, -2 },
+  },
+  {
+      { -2, 4, -2, 4, -2, 4, -2, 4 },
+      { -7, 13, -7, 13, -7, 13, -7, 13 },
+      { -26, 91, -26, 91, -26, 91, -26, 91 },
+      { 71, -24, 71, -24, 71, -24, 71, -24 },
+      { 13, -7, 13, -7, 13, -7, 13, -7 },
+      { 4, -2, 4, -2, 4, -2, 4, -2 },
+  },
+  {
+      { -2, 4, -2, 4, -2, 4, -2, 4 },
+      { -7, 13, -7, 13, -7, 13, -7, 13 },
+      { -25, 81, -25, 81, -25, 81, -25, 81 },
+      { 81, -25, 81, -25, 81, -25, 81, -25 },
+      { 13, -7, 13, -7, 13, -7, 13, -7 },
+      { 4, -2, 4, -2, 4, -2, 4, -2 },
+  },
+  {
+      { -2, 4, -2, 4, -2, 4, -2, 4 },
+      { -7, 13, -7, 13, -7, 13, -7, 13 },
+      { -24, 71, -24, 71, -24, 71, -24, 71 },
+      { 91, -26, 91, -26, 91, -26, 91, -26 },
+      { 13, -7, 13, -7, 13, -7, 13, -7 },
+      { 4, -2, 4, -2, 4, -2, 4, -2 },
+  },
+  {
+      { -2, 4, -2, 4, -2, 4, -2, 4 },
+      { -7, 11, -7, 11, -7, 11, -7, 11 },
+      { -21, 60, -21, 60, -21, 60, -21, 60 },
+      { 100, -25, 100, -25, 100, -25, 100, -25 },
+      { 13, -7, 13, -7, 13, -7, 13, -7 },
+      { 4, -2, 4, -2, 4, -2, 4, -2 },
+  },
+  {
+      { -2, 3, -2, 3, -2, 3, -2, 3 },
+      { -6, 10, -6, 10, -6, 10, -6, 10 },
+      { -18, 49, -18, 49, -18, 49, -18, 49 },
+      { 108, -24, 108, -24, 108, -24, 108, -24 },
+      { 12, -6, 12, -6, 12, -6, 12, -6 },
+      { 4, -2, 4, -2, 4, -2, 4, -2 },
+  },
+  {
+      { -1, 3, -1, 3, -1, 3, -1, 3 },
+      { -5, 8, -5, 8, -5, 8, -5, 8 },
+      { -15, 38, -15, 38, -15, 38, -15, 38 },
+      { 115, -21, 115, -21, 115, -21, 115, -21 },
+      { 10, -6, 10, -6, 10, -6, 10, -6 },
+      { 3, -1, 3, -1, 3, -1, 3, -1 },
+  },
+  {
+      { -1, 2, -1, 2, -1, 2, -1, 2 },
+      { -4, 7, -4, 7, -4, 7, -4, 7 },
+      { -12, 28, -12, 28, -12, 28, -12, 28 },
+      { 120, -18, 120, -18, 120, -18, 120, -18 },
+      { 8, -4, 8, -4, 8, -4, 8, -4 },
+      { 3, -1, 3, -1, 3, -1, 3, -1 },
+  },
+  {
+      { -1, 2, -1, 2, -1, 2, -1, 2 },
+      { -2, 4, -2, 4, -2, 4, -2, 4 },
+      { -8, 18, -8, 18, -8, 18, -8, 18 },
+      { 124, -13, 124, -13, 124, -13, 124, -13 },
+      { 6, -3, 6, -3, 6, -3, 6, -3 },
+      { 2, -1, 2, -1, 2, -1, 2, -1 },
+  },
+  {
+      { 0, 1, 0, 1, 0, 1, 0, 1 },
+      { -1, 2, -1, 2, -1, 2, -1, 2 },
+      { -4, 8, -4, 8, -4, 8, -4, 8 },
+      { 127, -7, 127, -7, 127, -7, 127, -7 },
+      { 3, -2, 3, -2, 3, -2, 3, -2 },
+      { 1, 0, 1, 0, 1, 0, 1, 0 },
+  },
+};
+#endif
+#endif
+#if CONFIG_VP9_HIGHBITDEPTH
+#if USE_TEMPORALFILTER_12TAP
+DECLARE_ALIGNED(
+    16, const int16_t,
+    sub_pel_filters_temporalfilter_12_highbd_ver_signal_dir[15][6][8]) = {
+  {
+      { 0, 1, 0, 1, 0, 1, 0, 1 },
+      { -1, 3, -1, 3, -1, 3, -1, 3 },
+      { -7, 127, -7, 127, -7, 127, -7, 127 },
+      { 8, -4, 8, -4, 8, -4, 8, -4 },
+      { 2, -1, 2, -1, 2, -1, 2, -1 },
+      { 0, 0, 0, 0, 0, 0, 0, 0 },
+  },
+  {
+      { 0, 1, 0, 1, 0, 1, 0, 1 },
+      { -3, 5, -3, 5, -3, 5, -3, 5 },
+      { -12, 124, -12, 124, -12, 124, -12, 124 },
+      { 18, -8, 18, -8, 18, -8, 18, -8 },
+      { 4, -2, 4, -2, 4, -2, 4, -2 },
+      { 1, 0, 1, 0, 1, 0, 1, 0 },
+  },
+  {
+      { -1, 2, -1, 2, -1, 2, -1, 2 },
+      { -4, 8, -4, 8, -4, 8, -4, 8 },
+      { -17, 120, -17, 120, -17, 120, -17, 120 },
+      { 28, -11, 28, -11, 28, -11, 28, -11 },
+      { 6, -3, 6, -3, 6, -3, 6, -3 },
+      { 1, -1, 1, -1, 1, -1, 1, -1 },
+  },
+  {
+      { -1, 2, -1, 2, -1, 2, -1, 2 },
+      { -4, 10, -4, 10, -4, 10, -4, 10 },
+      { -21, 114, -21, 114, -21, 114, -21, 114 },
+      { 38, -15, 38, -15, 38, -15, 38, -15 },
+      { 8, -4, 8, -4, 8, -4, 8, -4 },
+      { 2, -1, 2, -1, 2, -1, 2, -1 },
+  },
+  {
+      { -1, 3, -1, 3, -1, 3, -1, 3 },
+      { -5, 11, -5, 11, -5, 11, -5, 11 },
+      { -23, 107, -23, 107, -23, 107, -23, 107 },
+      { 49, -18, 49, -18, 49, -18, 49, -18 },
+      { 9, -5, 9, -5, 9, -5, 9, -5 },
+      { 2, -1, 2, -1, 2, -1, 2, -1 },
+  },
+  {
+      { -1, 3, -1, 3, -1, 3, -1, 3 },
+      { -6, 12, -6, 12, -6, 12, -6, 12 },
+      { -25, 99, -25, 99, -25, 99, -25, 99 },
+      { 60, -21, 60, -21, 60, -21, 60, -21 },
+      { 11, -6, 11, -6, 11, -6, 11, -6 },
+      { 3, -1, 3, -1, 3, -1, 3, -1 },
+  },
+  {
+      { -1, 3, -1, 3, -1, 3, -1, 3 },
+      { -6, 12, -6, 12, -6, 12, -6, 12 },
+      { -25, 90, -25, 90, -25, 90, -25, 90 },
+      { 70, -23, 70, -23, 70, -23, 70, -23 },
+      { 12, -6, 12, -6, 12, -6, 12, -6 },
+      { 3, -1, 3, -1, 3, -1, 3, -1 },
+  },
+  {
+      { -1, 3, -1, 3, -1, 3, -1, 3 },
+      { -6, 12, -6, 12, -6, 12, -6, 12 },
+      { -24, 80, -24, 80, -24, 80, -24, 80 },
+      { 80, -24, 80, -24, 80, -24, 80, -24 },
+      { 12, -6, 12, -6, 12, -6, 12, -6 },
+      { 3, -1, 3, -1, 3, -1, 3, -1 },
+  },
+  {
+      { -1, 3, -1, 3, -1, 3, -1, 3 },
+      { -6, 12, -6, 12, -6, 12, -6, 12 },
+      { -23, 70, -23, 70, -23, 70, -23, 70 },
+      { 90, -25, 90, -25, 90, -25, 90, -25 },
+      { 12, -6, 12, -6, 12, -6, 12, -6 },
+      { 3, -1, 3, -1, 3, -1, 3, -1 },
+  },
+  {
+      { -1, 3, -1, 3, -1, 3, -1, 3 },
+      { -6, 11, -6, 11, -6, 11, -6, 11 },
+      { -21, 60, -21, 60, -21, 60, -21, 60 },
+      { 99, -25, 99, -25, 99, -25, 99, -25 },
+      { 12, -6, 12, -6, 12, -6, 12, -6 },
+      { 3, -1, 3, -1, 3, -1, 3, -1 },
+  },
+  {
+      { -1, 2, -1, 2, -1, 2, -1, 2 },
+      { -5, 9, -5, 9, -5, 9, -5, 9 },
+      { -18, 49, -18, 49, -18, 49, -18, 49 },
+      { 107, -23, 107, -23, 107, -23, 107, -23 },
+      { 11, -5, 11, -5, 11, -5, 11, -5 },
+      { 3, -1, 3, -1, 3, -1, 3, -1 },
+  },
+  {
+      { -1, 2, -1, 2, -1, 2, -1, 2 },
+      { -4, 8, -4, 8, -4, 8, -4, 8 },
+      { -15, 38, -15, 38, -15, 38, -15, 38 },
+      { 114, -21, 114, -21, 114, -21, 114, -21 },
+      { 10, -4, 10, -4, 10, -4, 10, -4 },
+      { 2, -1, 2, -1, 2, -1, 2, -1 },
+  },
+  {
+      { -1, 1, -1, 1, -1, 1, -1, 1 },
+      { -3, 6, -3, 6, -3, 6, -3, 6 },
+      { -11, 28, -11, 28, -11, 28, -11, 28 },
+      { 120, -17, 120, -17, 120, -17, 120, -17 },
+      { 8, -4, 8, -4, 8, -4, 8, -4 },
+      { 2, -1, 2, -1, 2, -1, 2, -1 },
+  },
+  {
+      { 0, 1, 0, 1, 0, 1, 0, 1 },
+      { -2, 4, -2, 4, -2, 4, -2, 4 },
+      { -8, 18, -8, 18, -8, 18, -8, 18 },
+      { 124, -12, 124, -12, 124, -12, 124, -12 },
+      { 5, -3, 5, -3, 5, -3, 5, -3 },
+      { 1, 0, 1, 0, 1, 0, 1, 0 },
+  },
+  {
+      { 0, 0, 0, 0, 0, 0, 0, 0 },
+      { -1, 2, -1, 2, -1, 2, -1, 2 },
+      { -4, 8, -4, 8, -4, 8, -4, 8 },
+      { 127, -7, 127, -7, 127, -7, 127, -7 },
+      { 3, -1, 3, -1, 3, -1, 3, -1 },
+      { 1, 0, 1, 0, 1, 0, 1, 0 },
+  },
+};
+#endif
+#endif
diff --git a/av1/common/x86/vp10_highbd_convolve_sse4.c b/av1/common/x86/vp10_highbd_convolve_sse4.c
new file mode 100644
index 0000000..ea78400
--- /dev/null
+++ b/av1/common/x86/vp10_highbd_convolve_sse4.c
@@ -0,0 +1,466 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <smmintrin.h>
+
+#include "./vp10_rtcd.h"
+#include "av1/common/filter.h"
+
+typedef void (*TransposeSave)(const int width, int pixelsNum, uint32_t *src,
+                              int src_stride, uint16_t *dst, int dst_stride,
+                              int bd);
+
+// pixelsNum 0: write all 4 pixels
+//           1/2/3: residual pixels 1/2/3
+static void writePixel(__m128i *u, int width, int pixelsNum, uint16_t *dst,
+                       int dst_stride) {
+  if (2 == width) {
+    if (0 == pixelsNum) {
+      *(int *)dst = _mm_cvtsi128_si32(u[0]);
+      *(int *)(dst + dst_stride) = _mm_cvtsi128_si32(u[1]);
+      *(int *)(dst + 2 * dst_stride) = _mm_cvtsi128_si32(u[2]);
+      *(int *)(dst + 3 * dst_stride) = _mm_cvtsi128_si32(u[3]);
+    } else if (1 == pixelsNum) {
+      *(int *)dst = _mm_cvtsi128_si32(u[0]);
+    } else if (2 == pixelsNum) {
+      *(int *)dst = _mm_cvtsi128_si32(u[0]);
+      *(int *)(dst + dst_stride) = _mm_cvtsi128_si32(u[1]);
+    } else if (3 == pixelsNum) {
+      *(int *)dst = _mm_cvtsi128_si32(u[0]);
+      *(int *)(dst + dst_stride) = _mm_cvtsi128_si32(u[1]);
+      *(int *)(dst + 2 * dst_stride) = _mm_cvtsi128_si32(u[2]);
+    }
+  } else {
+    if (0 == pixelsNum) {
+      _mm_storel_epi64((__m128i *)dst, u[0]);
+      _mm_storel_epi64((__m128i *)(dst + dst_stride), u[1]);
+      _mm_storel_epi64((__m128i *)(dst + 2 * dst_stride), u[2]);
+      _mm_storel_epi64((__m128i *)(dst + 3 * dst_stride), u[3]);
+    } else if (1 == pixelsNum) {
+      _mm_storel_epi64((__m128i *)dst, u[0]);
+    } else if (2 == pixelsNum) {
+      _mm_storel_epi64((__m128i *)dst, u[0]);
+      _mm_storel_epi64((__m128i *)(dst + dst_stride), u[1]);
+    } else if (3 == pixelsNum) {
+      _mm_storel_epi64((__m128i *)dst, u[0]);
+      _mm_storel_epi64((__m128i *)(dst + dst_stride), u[1]);
+      _mm_storel_epi64((__m128i *)(dst + 2 * dst_stride), u[2]);
+    }
+  }
+}
+
+// 16-bit pixels clip with bd (10/12)
+static void highbd_clip(__m128i *p, int numVecs, int bd) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i one = _mm_set1_epi16(1);
+  const __m128i max = _mm_sub_epi16(_mm_slli_epi16(one, bd), one);
+  __m128i clamped, mask;
+  int i;
+
+  for (i = 0; i < numVecs; i++) {
+    mask = _mm_cmpgt_epi16(p[i], max);
+    clamped = _mm_andnot_si128(mask, p[i]);
+    mask = _mm_and_si128(mask, max);
+    clamped = _mm_or_si128(mask, clamped);
+    mask = _mm_cmpgt_epi16(clamped, zero);
+    p[i] = _mm_and_si128(clamped, mask);
+  }
+}
+
+static void transClipPixel(uint32_t *src, int src_stride, __m128i *u, int bd) {
+  __m128i v0, v1;
+  __m128i rnd = _mm_set1_epi32(1 << (FILTER_BITS - 1));
+
+  u[0] = _mm_loadu_si128((__m128i const *)src);
+  u[1] = _mm_loadu_si128((__m128i const *)(src + src_stride));
+  u[2] = _mm_loadu_si128((__m128i const *)(src + 2 * src_stride));
+  u[3] = _mm_loadu_si128((__m128i const *)(src + 3 * src_stride));
+
+  u[0] = _mm_add_epi32(u[0], rnd);
+  u[1] = _mm_add_epi32(u[1], rnd);
+  u[2] = _mm_add_epi32(u[2], rnd);
+  u[3] = _mm_add_epi32(u[3], rnd);
+
+  u[0] = _mm_srai_epi32(u[0], FILTER_BITS);
+  u[1] = _mm_srai_epi32(u[1], FILTER_BITS);
+  u[2] = _mm_srai_epi32(u[2], FILTER_BITS);
+  u[3] = _mm_srai_epi32(u[3], FILTER_BITS);
+
+  u[0] = _mm_packus_epi32(u[0], u[1]);
+  u[1] = _mm_packus_epi32(u[2], u[3]);
+
+  highbd_clip(u, 2, bd);
+
+  v0 = _mm_unpacklo_epi16(u[0], u[1]);
+  v1 = _mm_unpackhi_epi16(u[0], u[1]);
+
+  u[0] = _mm_unpacklo_epi16(v0, v1);
+  u[2] = _mm_unpackhi_epi16(v0, v1);
+
+  u[1] = _mm_srli_si128(u[0], 8);
+  u[3] = _mm_srli_si128(u[2], 8);
+}
+
+// pixelsNum = 0     : all 4 rows of pixels will be saved.
+// pixelsNum = 1/2/3 : residual 1/2/4 rows of pixels will be saved.
+void trans_save_4x4(const int width, int pixelsNum, uint32_t *src,
+                    int src_stride, uint16_t *dst, int dst_stride, int bd) {
+  __m128i u[4];
+  transClipPixel(src, src_stride, u, bd);
+  writePixel(u, width, pixelsNum, dst, dst_stride);
+}
+
+void trans_accum_save_4x4(const int width, int pixelsNum, uint32_t *src,
+                          int src_stride, uint16_t *dst, int dst_stride,
+                          int bd) {
+  __m128i u[4], v[4];
+  const __m128i ones = _mm_set1_epi16(1);
+
+  transClipPixel(src, src_stride, u, bd);
+
+  v[0] = _mm_loadl_epi64((__m128i const *)dst);
+  v[1] = _mm_loadl_epi64((__m128i const *)(dst + dst_stride));
+  v[2] = _mm_loadl_epi64((__m128i const *)(dst + 2 * dst_stride));
+  v[3] = _mm_loadl_epi64((__m128i const *)(dst + 3 * dst_stride));
+
+  u[0] = _mm_add_epi16(u[0], v[0]);
+  u[1] = _mm_add_epi16(u[1], v[1]);
+  u[2] = _mm_add_epi16(u[2], v[2]);
+  u[3] = _mm_add_epi16(u[3], v[3]);
+
+  u[0] = _mm_add_epi16(u[0], ones);
+  u[1] = _mm_add_epi16(u[1], ones);
+  u[2] = _mm_add_epi16(u[2], ones);
+  u[3] = _mm_add_epi16(u[3], ones);
+
+  u[0] = _mm_srai_epi16(u[0], 1);
+  u[1] = _mm_srai_epi16(u[1], 1);
+  u[2] = _mm_srai_epi16(u[2], 1);
+  u[3] = _mm_srai_epi16(u[3], 1);
+
+  writePixel(u, width, pixelsNum, dst, dst_stride);
+}
+
+static TransposeSave transSaveTab[2] = { trans_save_4x4, trans_accum_save_4x4 };
+
+static INLINE void transpose_pair(__m128i *in, __m128i *out) {
+  __m128i x0, x1;
+
+  x0 = _mm_unpacklo_epi32(in[0], in[1]);
+  x1 = _mm_unpacklo_epi32(in[2], in[3]);
+
+  out[0] = _mm_unpacklo_epi64(x0, x1);
+  out[1] = _mm_unpackhi_epi64(x0, x1);
+
+  x0 = _mm_unpackhi_epi32(in[0], in[1]);
+  x1 = _mm_unpackhi_epi32(in[2], in[3]);
+
+  out[2] = _mm_unpacklo_epi64(x0, x1);
+  out[3] = _mm_unpackhi_epi64(x0, x1);
+
+  x0 = _mm_unpacklo_epi32(in[4], in[5]);
+  x1 = _mm_unpacklo_epi32(in[6], in[7]);
+
+  out[4] = _mm_unpacklo_epi64(x0, x1);
+  out[5] = _mm_unpackhi_epi64(x0, x1);
+}
+
+static void highbd_filter_horiz(const uint16_t *src, int src_stride, __m128i *f,
+                                int tapsNum, uint32_t *buf) {
+  __m128i u[8], v[6];
+
+  if (tapsNum == 10) {
+    src -= 1;
+  }
+
+  u[0] = _mm_loadu_si128((__m128i const *)src);
+  u[1] = _mm_loadu_si128((__m128i const *)(src + src_stride));
+  u[2] = _mm_loadu_si128((__m128i const *)(src + 2 * src_stride));
+  u[3] = _mm_loadu_si128((__m128i const *)(src + 3 * src_stride));
+
+  u[4] = _mm_loadu_si128((__m128i const *)(src + 8));
+  u[5] = _mm_loadu_si128((__m128i const *)(src + src_stride + 8));
+  u[6] = _mm_loadu_si128((__m128i const *)(src + 2 * src_stride + 8));
+  u[7] = _mm_loadu_si128((__m128i const *)(src + 3 * src_stride + 8));
+
+  transpose_pair(u, v);
+
+  u[0] = _mm_madd_epi16(v[0], f[0]);
+  u[1] = _mm_madd_epi16(v[1], f[1]);
+  u[2] = _mm_madd_epi16(v[2], f[2]);
+  u[3] = _mm_madd_epi16(v[3], f[3]);
+  u[4] = _mm_madd_epi16(v[4], f[4]);
+  u[5] = _mm_madd_epi16(v[5], f[5]);
+
+  u[6] = _mm_min_epi32(u[2], u[3]);
+  u[7] = _mm_max_epi32(u[2], u[3]);
+
+  u[0] = _mm_add_epi32(u[0], u[1]);
+  u[0] = _mm_add_epi32(u[0], u[5]);
+  u[0] = _mm_add_epi32(u[0], u[4]);
+  u[0] = _mm_add_epi32(u[0], u[6]);
+  u[0] = _mm_add_epi32(u[0], u[7]);
+
+  _mm_storeu_si128((__m128i *)buf, u[0]);
+}
+
+void vp10_highbd_convolve_horiz_sse4_1(const uint16_t *src, int src_stride,
+                                       uint16_t *dst, int dst_stride, int w,
+                                       int h,
+                                       const InterpFilterParams filter_params,
+                                       const int subpel_x_q4, int x_step_q4,
+                                       int avg, int bd) {
+  DECLARE_ALIGNED(16, uint32_t, temp[4 * 4]);
+  __m128i verf[6];
+  HbdSubpelFilterCoeffs vCoeffs;
+  const uint16_t *srcPtr;
+  const int tapsNum = filter_params.taps;
+  int i, col, count, blkResidu, blkHeight;
+  TransposeSave transSave = transSaveTab[avg];
+  (void)x_step_q4;
+
+  if (0 == subpel_x_q4 || 16 != x_step_q4) {
+    vp10_highbd_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h,
+                                 filter_params, subpel_x_q4, x_step_q4, avg,
+                                 bd);
+    return;
+  }
+
+  vCoeffs =
+      vp10_hbd_get_subpel_filter_ver_signal_dir(filter_params, subpel_x_q4 - 1);
+  if (!vCoeffs) {
+    vp10_highbd_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h,
+                                 filter_params, subpel_x_q4, x_step_q4, avg,
+                                 bd);
+    return;
+  }
+
+  verf[0] = *((const __m128i *)(vCoeffs));
+  verf[1] = *((const __m128i *)(vCoeffs + 1));
+  verf[2] = *((const __m128i *)(vCoeffs + 2));
+  verf[3] = *((const __m128i *)(vCoeffs + 3));
+  verf[4] = *((const __m128i *)(vCoeffs + 4));
+  verf[5] = *((const __m128i *)(vCoeffs + 5));
+
+  src -= (tapsNum >> 1) - 1;
+  srcPtr = src;
+
+  count = 0;
+  blkHeight = h >> 2;
+  blkResidu = h & 3;
+
+  while (blkHeight != 0) {
+    for (col = 0; col < w; col += 4) {
+      for (i = 0; i < 4; ++i) {
+        highbd_filter_horiz(srcPtr, src_stride, verf, tapsNum, temp + (i * 4));
+        srcPtr += 1;
+      }
+      transSave(w, 0, temp, 4, dst + col, dst_stride, bd);
+    }
+    count++;
+    srcPtr = src + count * src_stride * 4;
+    dst += dst_stride * 4;
+    blkHeight--;
+  }
+
+  if (blkResidu == 0) return;
+
+  for (col = 0; col < w; col += 4) {
+    for (i = 0; i < 4; ++i) {
+      highbd_filter_horiz(srcPtr, src_stride, verf, tapsNum, temp + (i * 4));
+      srcPtr += 1;
+    }
+    transSave(w, blkResidu, temp, 4, dst + col, dst_stride, bd);
+  }
+}
+
+// Vertical convolutional filter
+
+typedef void (*WritePixels)(__m128i *u, int bd, uint16_t *dst);
+
+static void highbdRndingPacks(__m128i *u) {
+  __m128i rnd = _mm_set1_epi32(1 << (FILTER_BITS - 1));
+  u[0] = _mm_add_epi32(u[0], rnd);
+  u[0] = _mm_srai_epi32(u[0], FILTER_BITS);
+  u[0] = _mm_packus_epi32(u[0], u[0]);
+}
+
+static void write2pixelsOnly(__m128i *u, int bd, uint16_t *dst) {
+  highbdRndingPacks(u);
+  highbd_clip(u, 1, bd);
+  *(uint32_t *)dst = _mm_cvtsi128_si32(u[0]);
+}
+
+static void write2pixelsAccum(__m128i *u, int bd, uint16_t *dst) {
+  __m128i v = _mm_loadl_epi64((__m128i const *)dst);
+  const __m128i ones = _mm_set1_epi16(1);
+
+  highbdRndingPacks(u);
+  highbd_clip(u, 1, bd);
+
+  v = _mm_add_epi16(v, u[0]);
+  v = _mm_add_epi16(v, ones);
+  v = _mm_srai_epi16(v, 1);
+  *(uint32_t *)dst = _mm_cvtsi128_si32(v);
+}
+
+WritePixels write2pixelsTab[2] = { write2pixelsOnly, write2pixelsAccum };
+
+static void write4pixelsOnly(__m128i *u, int bd, uint16_t *dst) {
+  highbdRndingPacks(u);
+  highbd_clip(u, 1, bd);
+  _mm_storel_epi64((__m128i *)dst, u[0]);
+}
+
+static void write4pixelsAccum(__m128i *u, int bd, uint16_t *dst) {
+  __m128i v = _mm_loadl_epi64((__m128i const *)dst);
+  const __m128i ones = _mm_set1_epi16(1);
+
+  highbdRndingPacks(u);
+  highbd_clip(u, 1, bd);
+
+  v = _mm_add_epi16(v, u[0]);
+  v = _mm_add_epi16(v, ones);
+  v = _mm_srai_epi16(v, 1);
+  _mm_storel_epi64((__m128i *)dst, v);
+}
+
+WritePixels write4pixelsTab[2] = { write4pixelsOnly, write4pixelsAccum };
+
+static void filter_vert_horiz_parallel(const uint16_t *src, int src_stride,
+                                       const __m128i *f, int taps,
+                                       uint16_t *dst, WritePixels saveFunc,
+                                       int bd) {
+  __m128i s[12];
+  __m128i zero = _mm_setzero_si128();
+  int i = 0;
+  int r = 0;
+
+  // TODO(luoyi) treat s[12] as a circular buffer in width = 2 case
+  if (10 == taps) {
+    i += 1;
+    s[0] = zero;
+  }
+  while (i < 12) {
+    s[i] = _mm_loadu_si128((__m128i const *)(src + r * src_stride));
+    i += 1;
+    r += 1;
+  }
+
+  s[0] = _mm_unpacklo_epi16(s[0], s[1]);
+  s[2] = _mm_unpacklo_epi16(s[2], s[3]);
+  s[4] = _mm_unpacklo_epi16(s[4], s[5]);
+  s[6] = _mm_unpacklo_epi16(s[6], s[7]);
+  s[8] = _mm_unpacklo_epi16(s[8], s[9]);
+  s[10] = _mm_unpacklo_epi16(s[10], s[11]);
+
+  s[0] = _mm_madd_epi16(s[0], f[0]);
+  s[2] = _mm_madd_epi16(s[2], f[1]);
+  s[4] = _mm_madd_epi16(s[4], f[2]);
+  s[6] = _mm_madd_epi16(s[6], f[3]);
+  s[8] = _mm_madd_epi16(s[8], f[4]);
+  s[10] = _mm_madd_epi16(s[10], f[5]);
+
+  s[1] = _mm_min_epi32(s[4], s[6]);
+  s[3] = _mm_max_epi32(s[4], s[6]);
+
+  s[0] = _mm_add_epi32(s[0], s[2]);
+  s[0] = _mm_add_epi32(s[0], s[10]);
+  s[0] = _mm_add_epi32(s[0], s[8]);
+  s[0] = _mm_add_epi32(s[0], s[1]);
+  s[0] = _mm_add_epi32(s[0], s[3]);
+
+  saveFunc(s, bd, dst);
+}
+
+static void highbd_filter_vert_compute_large(const uint16_t *src,
+                                             int src_stride, const __m128i *f,
+                                             int taps, int w, int h,
+                                             uint16_t *dst, int dst_stride,
+                                             int avg, int bd) {
+  int col;
+  int rowIndex = 0;
+  const uint16_t *src_ptr = src;
+  uint16_t *dst_ptr = dst;
+  const int step = 4;
+  WritePixels write4pixels = write4pixelsTab[avg];
+
+  do {
+    for (col = 0; col < w; col += step) {
+      filter_vert_horiz_parallel(src_ptr, src_stride, f, taps, dst_ptr,
+                                 write4pixels, bd);
+      src_ptr += step;
+      dst_ptr += step;
+    }
+    rowIndex++;
+    src_ptr = src + rowIndex * src_stride;
+    dst_ptr = dst + rowIndex * dst_stride;
+  } while (rowIndex < h);
+}
+
+static void highbd_filter_vert_compute_small(const uint16_t *src,
+                                             int src_stride, const __m128i *f,
+                                             int taps, int w, int h,
+                                             uint16_t *dst, int dst_stride,
+                                             int avg, int bd) {
+  int rowIndex = 0;
+  WritePixels write2pixels = write2pixelsTab[avg];
+  (void)w;
+
+  do {
+    filter_vert_horiz_parallel(src, src_stride, f, taps, dst, write2pixels, bd);
+    rowIndex++;
+    src += src_stride;
+    dst += dst_stride;
+  } while (rowIndex < h);
+}
+
+void vp10_highbd_convolve_vert_sse4_1(const uint16_t *src, int src_stride,
+                                      uint16_t *dst, int dst_stride, int w,
+                                      int h,
+                                      const InterpFilterParams filter_params,
+                                      const int subpel_y_q4, int y_step_q4,
+                                      int avg, int bd) {
+  __m128i verf[6];
+  HbdSubpelFilterCoeffs vCoeffs;
+  const int tapsNum = filter_params.taps;
+
+  if (0 == subpel_y_q4 || 16 != y_step_q4) {
+    vp10_highbd_convolve_vert_c(src, src_stride, dst, dst_stride, w, h,
+                                filter_params, subpel_y_q4, y_step_q4, avg, bd);
+    return;
+  }
+
+  vCoeffs =
+      vp10_hbd_get_subpel_filter_ver_signal_dir(filter_params, subpel_y_q4 - 1);
+  if (!vCoeffs) {
+    vp10_highbd_convolve_vert_c(src, src_stride, dst, dst_stride, w, h,
+                                filter_params, subpel_y_q4, y_step_q4, avg, bd);
+    return;
+  }
+
+  verf[0] = *((const __m128i *)(vCoeffs));
+  verf[1] = *((const __m128i *)(vCoeffs + 1));
+  verf[2] = *((const __m128i *)(vCoeffs + 2));
+  verf[3] = *((const __m128i *)(vCoeffs + 3));
+  verf[4] = *((const __m128i *)(vCoeffs + 4));
+  verf[5] = *((const __m128i *)(vCoeffs + 5));
+
+  src -= src_stride * ((tapsNum >> 1) - 1);
+
+  if (w > 2) {
+    highbd_filter_vert_compute_large(src, src_stride, verf, tapsNum, w, h, dst,
+                                     dst_stride, avg, bd);
+  } else {
+    highbd_filter_vert_compute_small(src, src_stride, verf, tapsNum, w, h, dst,
+                                     dst_stride, avg, bd);
+  }
+}
diff --git a/av1/common/x86/vp10_inv_txfm_sse2.c b/av1/common/x86/vp10_inv_txfm_sse2.c
new file mode 100644
index 0000000..b09933e
--- /dev/null
+++ b/av1/common/x86/vp10_inv_txfm_sse2.c
@@ -0,0 +1,4033 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp10_rtcd.h"
+#include "av1/common/x86/vp10_inv_txfm_sse2.h"
+#include "aom_dsp/x86/txfm_common_sse2.h"
+
+#define RECON_AND_STORE4X4(dest, in_x)                    \
+  {                                                       \
+    __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); \
+    d0 = _mm_unpacklo_epi8(d0, zero);                     \
+    d0 = _mm_add_epi16(in_x, d0);                         \
+    d0 = _mm_packus_epi16(d0, d0);                        \
+    *(int *)(dest) = _mm_cvtsi128_si32(d0);               \
+  }
+
+void vp10_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i eight = _mm_set1_epi16(8);
+  const __m128i cst = _mm_setr_epi16(
+      (int16_t)cospi_16_64, (int16_t)cospi_16_64, (int16_t)cospi_16_64,
+      (int16_t)-cospi_16_64, (int16_t)cospi_24_64, (int16_t)-cospi_8_64,
+      (int16_t)cospi_8_64, (int16_t)cospi_24_64);
+  const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+  __m128i input0, input1, input2, input3;
+
+  // Rows
+  input0 = _mm_load_si128((const __m128i *)input);
+  input2 = _mm_load_si128((const __m128i *)(input + 8));
+
+  // Construct i3, i1, i3, i1, i2, i0, i2, i0
+  input0 = _mm_shufflelo_epi16(input0, 0xd8);
+  input0 = _mm_shufflehi_epi16(input0, 0xd8);
+  input2 = _mm_shufflelo_epi16(input2, 0xd8);
+  input2 = _mm_shufflehi_epi16(input2, 0xd8);
+
+  input1 = _mm_unpackhi_epi32(input0, input0);
+  input0 = _mm_unpacklo_epi32(input0, input0);
+  input3 = _mm_unpackhi_epi32(input2, input2);
+  input2 = _mm_unpacklo_epi32(input2, input2);
+
+  // Stage 1
+  input0 = _mm_madd_epi16(input0, cst);
+  input1 = _mm_madd_epi16(input1, cst);
+  input2 = _mm_madd_epi16(input2, cst);
+  input3 = _mm_madd_epi16(input3, cst);
+
+  input0 = _mm_add_epi32(input0, rounding);
+  input1 = _mm_add_epi32(input1, rounding);
+  input2 = _mm_add_epi32(input2, rounding);
+  input3 = _mm_add_epi32(input3, rounding);
+
+  input0 = _mm_srai_epi32(input0, DCT_CONST_BITS);
+  input1 = _mm_srai_epi32(input1, DCT_CONST_BITS);
+  input2 = _mm_srai_epi32(input2, DCT_CONST_BITS);
+  input3 = _mm_srai_epi32(input3, DCT_CONST_BITS);
+
+  // Stage 2
+  input0 = _mm_packs_epi32(input0, input1);
+  input1 = _mm_packs_epi32(input2, input3);
+
+  // Transpose
+  input2 = _mm_unpacklo_epi16(input0, input1);
+  input3 = _mm_unpackhi_epi16(input0, input1);
+  input0 = _mm_unpacklo_epi32(input2, input3);
+  input1 = _mm_unpackhi_epi32(input2, input3);
+
+  // Switch column2, column 3, and then, we got:
+  // input2: column1, column 0;  input3: column2, column 3.
+  input1 = _mm_shuffle_epi32(input1, 0x4e);
+  input2 = _mm_add_epi16(input0, input1);
+  input3 = _mm_sub_epi16(input0, input1);
+
+  // Columns
+  // Construct i3, i1, i3, i1, i2, i0, i2, i0
+  input0 = _mm_unpacklo_epi32(input2, input2);
+  input1 = _mm_unpackhi_epi32(input2, input2);
+  input2 = _mm_unpackhi_epi32(input3, input3);
+  input3 = _mm_unpacklo_epi32(input3, input3);
+
+  // Stage 1
+  input0 = _mm_madd_epi16(input0, cst);
+  input1 = _mm_madd_epi16(input1, cst);
+  input2 = _mm_madd_epi16(input2, cst);
+  input3 = _mm_madd_epi16(input3, cst);
+
+  input0 = _mm_add_epi32(input0, rounding);
+  input1 = _mm_add_epi32(input1, rounding);
+  input2 = _mm_add_epi32(input2, rounding);
+  input3 = _mm_add_epi32(input3, rounding);
+
+  input0 = _mm_srai_epi32(input0, DCT_CONST_BITS);
+  input1 = _mm_srai_epi32(input1, DCT_CONST_BITS);
+  input2 = _mm_srai_epi32(input2, DCT_CONST_BITS);
+  input3 = _mm_srai_epi32(input3, DCT_CONST_BITS);
+
+  // Stage 2
+  input0 = _mm_packs_epi32(input0, input2);
+  input1 = _mm_packs_epi32(input1, input3);
+
+  // Transpose
+  input2 = _mm_unpacklo_epi16(input0, input1);
+  input3 = _mm_unpackhi_epi16(input0, input1);
+  input0 = _mm_unpacklo_epi32(input2, input3);
+  input1 = _mm_unpackhi_epi32(input2, input3);
+
+  // Switch column2, column 3, and then, we got:
+  // input2: column1, column 0;  input3: column2, column 3.
+  input1 = _mm_shuffle_epi32(input1, 0x4e);
+  input2 = _mm_add_epi16(input0, input1);
+  input3 = _mm_sub_epi16(input0, input1);
+
+  // Final round and shift
+  input2 = _mm_add_epi16(input2, eight);
+  input3 = _mm_add_epi16(input3, eight);
+
+  input2 = _mm_srai_epi16(input2, 4);
+  input3 = _mm_srai_epi16(input3, 4);
+
+  // Reconstruction and Store
+  {
+    __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest));
+    __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2));
+    d0 = _mm_unpacklo_epi32(d0,
+                            _mm_cvtsi32_si128(*(const int *)(dest + stride)));
+    d2 = _mm_unpacklo_epi32(
+        _mm_cvtsi32_si128(*(const int *)(dest + stride * 3)), d2);
+    d0 = _mm_unpacklo_epi8(d0, zero);
+    d2 = _mm_unpacklo_epi8(d2, zero);
+    d0 = _mm_add_epi16(d0, input2);
+    d2 = _mm_add_epi16(d2, input3);
+    d0 = _mm_packus_epi16(d0, d2);
+    // store input0
+    *(int *)dest = _mm_cvtsi128_si32(d0);
+    // store input1
+    d0 = _mm_srli_si128(d0, 4);
+    *(int *)(dest + stride) = _mm_cvtsi128_si32(d0);
+    // store input2
+    d0 = _mm_srli_si128(d0, 4);
+    *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d0);
+    // store input3
+    d0 = _mm_srli_si128(d0, 4);
+    *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d0);
+  }
+}
+
+void vp10_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+  __m128i dc_value;
+  const __m128i zero = _mm_setzero_si128();
+  int a;
+
+  a = (int)dct_const_round_shift(input[0] * cospi_16_64);
+  a = (int)dct_const_round_shift(a * cospi_16_64);
+  a = ROUND_POWER_OF_TWO(a, 4);
+
+  dc_value = _mm_set1_epi16(a);
+
+  RECON_AND_STORE4X4(dest + 0 * stride, dc_value);
+  RECON_AND_STORE4X4(dest + 1 * stride, dc_value);
+  RECON_AND_STORE4X4(dest + 2 * stride, dc_value);
+  RECON_AND_STORE4X4(dest + 3 * stride, dc_value);
+}
+
+static INLINE void transpose_4x4(__m128i *res) {
+  const __m128i tr0_0 = _mm_unpacklo_epi16(res[0], res[1]);
+  const __m128i tr0_1 = _mm_unpackhi_epi16(res[0], res[1]);
+
+  res[0] = _mm_unpacklo_epi16(tr0_0, tr0_1);
+  res[1] = _mm_unpackhi_epi16(tr0_0, tr0_1);
+}
+
+void vp10_idct4_sse2(__m128i *in) {
+  const __m128i k__cospi_p16_p16 = pair_set_epi16(cospi_16_64, cospi_16_64);
+  const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+  const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+  const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
+  const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+  __m128i u[8], v[8];
+
+  transpose_4x4(in);
+  // stage 1
+  u[0] = _mm_unpacklo_epi16(in[0], in[1]);
+  u[1] = _mm_unpackhi_epi16(in[0], in[1]);
+  v[0] = _mm_madd_epi16(u[0], k__cospi_p16_p16);
+  v[1] = _mm_madd_epi16(u[0], k__cospi_p16_m16);
+  v[2] = _mm_madd_epi16(u[1], k__cospi_p24_m08);
+  v[3] = _mm_madd_epi16(u[1], k__cospi_p08_p24);
+
+  u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+  u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+  u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+  u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+
+  v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+  v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+  v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+  v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+
+  u[0] = _mm_packs_epi32(v[0], v[1]);
+  u[1] = _mm_packs_epi32(v[3], v[2]);
+
+  // stage 2
+  in[0] = _mm_add_epi16(u[0], u[1]);
+  in[1] = _mm_sub_epi16(u[0], u[1]);
+  in[1] = _mm_shuffle_epi32(in[1], 0x4E);
+}
+
+void vp10_iadst4_sse2(__m128i *in) {
+  const __m128i k__sinpi_p01_p04 = pair_set_epi16(sinpi_1_9, sinpi_4_9);
+  const __m128i k__sinpi_p03_p02 = pair_set_epi16(sinpi_3_9, sinpi_2_9);
+  const __m128i k__sinpi_p02_m01 = pair_set_epi16(sinpi_2_9, -sinpi_1_9);
+  const __m128i k__sinpi_p03_m04 = pair_set_epi16(sinpi_3_9, -sinpi_4_9);
+  const __m128i k__sinpi_p03_p03 = _mm_set1_epi16((int16_t)sinpi_3_9);
+  const __m128i kZero = _mm_set1_epi16(0);
+  const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+  __m128i u[8], v[8], in7;
+
+  transpose_4x4(in);
+  in7 = _mm_srli_si128(in[1], 8);
+  in7 = _mm_add_epi16(in7, in[0]);
+  in7 = _mm_sub_epi16(in7, in[1]);
+
+  u[0] = _mm_unpacklo_epi16(in[0], in[1]);
+  u[1] = _mm_unpackhi_epi16(in[0], in[1]);
+  u[2] = _mm_unpacklo_epi16(in7, kZero);
+  u[3] = _mm_unpackhi_epi16(in[0], kZero);
+
+  v[0] = _mm_madd_epi16(u[0], k__sinpi_p01_p04);  // s0 + s3
+  v[1] = _mm_madd_epi16(u[1], k__sinpi_p03_p02);  // s2 + s5
+  v[2] = _mm_madd_epi16(u[2], k__sinpi_p03_p03);  // x2
+  v[3] = _mm_madd_epi16(u[0], k__sinpi_p02_m01);  // s1 - s4
+  v[4] = _mm_madd_epi16(u[1], k__sinpi_p03_m04);  // s2 - s6
+  v[5] = _mm_madd_epi16(u[3], k__sinpi_p03_p03);  // s2
+
+  u[0] = _mm_add_epi32(v[0], v[1]);
+  u[1] = _mm_add_epi32(v[3], v[4]);
+  u[2] = v[2];
+  u[3] = _mm_add_epi32(u[0], u[1]);
+  u[4] = _mm_slli_epi32(v[5], 2);
+  u[5] = _mm_add_epi32(u[3], v[5]);
+  u[6] = _mm_sub_epi32(u[5], u[4]);
+
+  v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+  v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+  v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+  v[3] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+
+  u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+  u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+  u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+  u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+
+  in[0] = _mm_packs_epi32(u[0], u[1]);
+  in[1] = _mm_packs_epi32(u[2], u[3]);
+}
+
+#define TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
+                      out2, out3, out4, out5, out6, out7)                 \
+  {                                                                       \
+    const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1);                   \
+    const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3);                   \
+    const __m128i tr0_2 = _mm_unpackhi_epi16(in0, in1);                   \
+    const __m128i tr0_3 = _mm_unpackhi_epi16(in2, in3);                   \
+    const __m128i tr0_4 = _mm_unpacklo_epi16(in4, in5);                   \
+    const __m128i tr0_5 = _mm_unpacklo_epi16(in6, in7);                   \
+    const __m128i tr0_6 = _mm_unpackhi_epi16(in4, in5);                   \
+    const __m128i tr0_7 = _mm_unpackhi_epi16(in6, in7);                   \
+                                                                          \
+    const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);               \
+    const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3);               \
+    const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);               \
+    const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3);               \
+    const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5);               \
+    const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);               \
+    const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5);               \
+    const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);               \
+                                                                          \
+    out0 = _mm_unpacklo_epi64(tr1_0, tr1_4);                              \
+    out1 = _mm_unpackhi_epi64(tr1_0, tr1_4);                              \
+    out2 = _mm_unpacklo_epi64(tr1_2, tr1_6);                              \
+    out3 = _mm_unpackhi_epi64(tr1_2, tr1_6);                              \
+    out4 = _mm_unpacklo_epi64(tr1_1, tr1_5);                              \
+    out5 = _mm_unpackhi_epi64(tr1_1, tr1_5);                              \
+    out6 = _mm_unpacklo_epi64(tr1_3, tr1_7);                              \
+    out7 = _mm_unpackhi_epi64(tr1_3, tr1_7);                              \
+  }
+
+#define TRANSPOSE_4X8_10(tmp0, tmp1, tmp2, tmp3, out0, out1, out2, out3) \
+  {                                                                      \
+    const __m128i tr0_0 = _mm_unpackhi_epi16(tmp0, tmp1);                \
+    const __m128i tr0_1 = _mm_unpacklo_epi16(tmp1, tmp0);                \
+    const __m128i tr0_4 = _mm_unpacklo_epi16(tmp2, tmp3);                \
+    const __m128i tr0_5 = _mm_unpackhi_epi16(tmp3, tmp2);                \
+                                                                         \
+    const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);              \
+    const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);              \
+    const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5);              \
+    const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5);              \
+                                                                         \
+    out0 = _mm_unpacklo_epi64(tr1_0, tr1_4);                             \
+    out1 = _mm_unpackhi_epi64(tr1_0, tr1_4);                             \
+    out2 = _mm_unpacklo_epi64(tr1_2, tr1_6);                             \
+    out3 = _mm_unpackhi_epi64(tr1_2, tr1_6);                             \
+  }
+
+#define TRANSPOSE_8X8_10(in0, in1, in2, in3, out0, out1) \
+  {                                                      \
+    const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1);  \
+    const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3);  \
+    out0 = _mm_unpacklo_epi32(tr0_0, tr0_1);             \
+    out1 = _mm_unpackhi_epi32(tr0_0, tr0_1);             \
+  }
+
+// Define Macro for multiplying elements by constants and adding them together.
+#define MULTIPLICATION_AND_ADD(lo_0, hi_0, lo_1, hi_1, cst0, cst1, cst2, cst3, \
+                               res0, res1, res2, res3)                         \
+  {                                                                            \
+    tmp0 = _mm_madd_epi16(lo_0, cst0);                                         \
+    tmp1 = _mm_madd_epi16(hi_0, cst0);                                         \
+    tmp2 = _mm_madd_epi16(lo_0, cst1);                                         \
+    tmp3 = _mm_madd_epi16(hi_0, cst1);                                         \
+    tmp4 = _mm_madd_epi16(lo_1, cst2);                                         \
+    tmp5 = _mm_madd_epi16(hi_1, cst2);                                         \
+    tmp6 = _mm_madd_epi16(lo_1, cst3);                                         \
+    tmp7 = _mm_madd_epi16(hi_1, cst3);                                         \
+                                                                               \
+    tmp0 = _mm_add_epi32(tmp0, rounding);                                      \
+    tmp1 = _mm_add_epi32(tmp1, rounding);                                      \
+    tmp2 = _mm_add_epi32(tmp2, rounding);                                      \
+    tmp3 = _mm_add_epi32(tmp3, rounding);                                      \
+    tmp4 = _mm_add_epi32(tmp4, rounding);                                      \
+    tmp5 = _mm_add_epi32(tmp5, rounding);                                      \
+    tmp6 = _mm_add_epi32(tmp6, rounding);                                      \
+    tmp7 = _mm_add_epi32(tmp7, rounding);                                      \
+                                                                               \
+    tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);                               \
+    tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS);                               \
+    tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);                               \
+    tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS);                               \
+    tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS);                               \
+    tmp5 = _mm_srai_epi32(tmp5, DCT_CONST_BITS);                               \
+    tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS);                               \
+    tmp7 = _mm_srai_epi32(tmp7, DCT_CONST_BITS);                               \
+                                                                               \
+    res0 = _mm_packs_epi32(tmp0, tmp1);                                        \
+    res1 = _mm_packs_epi32(tmp2, tmp3);                                        \
+    res2 = _mm_packs_epi32(tmp4, tmp5);                                        \
+    res3 = _mm_packs_epi32(tmp6, tmp7);                                        \
+  }
+
+#define MULTIPLICATION_AND_ADD_2(lo_0, hi_0, cst0, cst1, res0, res1) \
+  {                                                                  \
+    tmp0 = _mm_madd_epi16(lo_0, cst0);                               \
+    tmp1 = _mm_madd_epi16(hi_0, cst0);                               \
+    tmp2 = _mm_madd_epi16(lo_0, cst1);                               \
+    tmp3 = _mm_madd_epi16(hi_0, cst1);                               \
+                                                                     \
+    tmp0 = _mm_add_epi32(tmp0, rounding);                            \
+    tmp1 = _mm_add_epi32(tmp1, rounding);                            \
+    tmp2 = _mm_add_epi32(tmp2, rounding);                            \
+    tmp3 = _mm_add_epi32(tmp3, rounding);                            \
+                                                                     \
+    tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);                     \
+    tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS);                     \
+    tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);                     \
+    tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS);                     \
+                                                                     \
+    res0 = _mm_packs_epi32(tmp0, tmp1);                              \
+    res1 = _mm_packs_epi32(tmp2, tmp3);                              \
+  }
+
+#define IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3, \
+              out4, out5, out6, out7)                                         \
+  {                                                                           \
+    /* Stage1 */                                                              \
+    {                                                                         \
+      const __m128i lo_17 = _mm_unpacklo_epi16(in1, in7);                     \
+      const __m128i hi_17 = _mm_unpackhi_epi16(in1, in7);                     \
+      const __m128i lo_35 = _mm_unpacklo_epi16(in3, in5);                     \
+      const __m128i hi_35 = _mm_unpackhi_epi16(in3, in5);                     \
+                                                                              \
+      MULTIPLICATION_AND_ADD(lo_17, hi_17, lo_35, hi_35, stg1_0, stg1_1,      \
+                             stg1_2, stg1_3, stp1_4, stp1_7, stp1_5, stp1_6)  \
+    }                                                                         \
+                                                                              \
+    /* Stage2 */                                                              \
+    {                                                                         \
+      const __m128i lo_04 = _mm_unpacklo_epi16(in0, in4);                     \
+      const __m128i hi_04 = _mm_unpackhi_epi16(in0, in4);                     \
+      const __m128i lo_26 = _mm_unpacklo_epi16(in2, in6);                     \
+      const __m128i hi_26 = _mm_unpackhi_epi16(in2, in6);                     \
+                                                                              \
+      MULTIPLICATION_AND_ADD(lo_04, hi_04, lo_26, hi_26, stg2_0, stg2_1,      \
+                             stg2_2, stg2_3, stp2_0, stp2_1, stp2_2, stp2_3)  \
+                                                                              \
+      stp2_4 = _mm_adds_epi16(stp1_4, stp1_5);                                \
+      stp2_5 = _mm_subs_epi16(stp1_4, stp1_5);                                \
+      stp2_6 = _mm_subs_epi16(stp1_7, stp1_6);                                \
+      stp2_7 = _mm_adds_epi16(stp1_7, stp1_6);                                \
+    }                                                                         \
+                                                                              \
+    /* Stage3 */                                                              \
+    {                                                                         \
+      const __m128i lo_56 = _mm_unpacklo_epi16(stp2_6, stp2_5);               \
+      const __m128i hi_56 = _mm_unpackhi_epi16(stp2_6, stp2_5);               \
+                                                                              \
+      stp1_0 = _mm_adds_epi16(stp2_0, stp2_3);                                \
+      stp1_1 = _mm_adds_epi16(stp2_1, stp2_2);                                \
+      stp1_2 = _mm_subs_epi16(stp2_1, stp2_2);                                \
+      stp1_3 = _mm_subs_epi16(stp2_0, stp2_3);                                \
+                                                                              \
+      tmp0 = _mm_madd_epi16(lo_56, stg2_1);                                   \
+      tmp1 = _mm_madd_epi16(hi_56, stg2_1);                                   \
+      tmp2 = _mm_madd_epi16(lo_56, stg2_0);                                   \
+      tmp3 = _mm_madd_epi16(hi_56, stg2_0);                                   \
+                                                                              \
+      tmp0 = _mm_add_epi32(tmp0, rounding);                                   \
+      tmp1 = _mm_add_epi32(tmp1, rounding);                                   \
+      tmp2 = _mm_add_epi32(tmp2, rounding);                                   \
+      tmp3 = _mm_add_epi32(tmp3, rounding);                                   \
+                                                                              \
+      tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);                            \
+      tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS);                            \
+      tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);                            \
+      tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS);                            \
+                                                                              \
+      stp1_5 = _mm_packs_epi32(tmp0, tmp1);                                   \
+      stp1_6 = _mm_packs_epi32(tmp2, tmp3);                                   \
+    }                                                                         \
+                                                                              \
+    /* Stage4  */                                                             \
+    out0 = _mm_adds_epi16(stp1_0, stp2_7);                                    \
+    out1 = _mm_adds_epi16(stp1_1, stp1_6);                                    \
+    out2 = _mm_adds_epi16(stp1_2, stp1_5);                                    \
+    out3 = _mm_adds_epi16(stp1_3, stp2_4);                                    \
+    out4 = _mm_subs_epi16(stp1_3, stp2_4);                                    \
+    out5 = _mm_subs_epi16(stp1_2, stp1_5);                                    \
+    out6 = _mm_subs_epi16(stp1_1, stp1_6);                                    \
+    out7 = _mm_subs_epi16(stp1_0, stp2_7);                                    \
+  }
+
+void vp10_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+  const __m128i final_rounding = _mm_set1_epi16(1 << 4);
+  const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+  const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
+  const __m128i stg1_2 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+  const __m128i stg1_3 = pair_set_epi16(cospi_12_64, cospi_20_64);
+  const __m128i stg2_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
+  const __m128i stg2_1 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+  const __m128i stg2_2 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+  const __m128i stg2_3 = pair_set_epi16(cospi_8_64, cospi_24_64);
+
+  __m128i in0, in1, in2, in3, in4, in5, in6, in7;
+  __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7;
+  __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7;
+  __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  int i;
+
+  // Load input data.
+  in0 = _mm_load_si128((const __m128i *)input);
+  in1 = _mm_load_si128((const __m128i *)(input + 8 * 1));
+  in2 = _mm_load_si128((const __m128i *)(input + 8 * 2));
+  in3 = _mm_load_si128((const __m128i *)(input + 8 * 3));
+  in4 = _mm_load_si128((const __m128i *)(input + 8 * 4));
+  in5 = _mm_load_si128((const __m128i *)(input + 8 * 5));
+  in6 = _mm_load_si128((const __m128i *)(input + 8 * 6));
+  in7 = _mm_load_si128((const __m128i *)(input + 8 * 7));
+
+  // 2-D
+  for (i = 0; i < 2; i++) {
+    // 8x8 Transpose is copied from vp10_fdct8x8_sse2()
+    TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+                  in4, in5, in6, in7);
+
+    // 4-stage 1D vp10_idct8x8
+    IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5,
+          in6, in7);
+  }
+
+  // Final rounding and shift
+  in0 = _mm_adds_epi16(in0, final_rounding);
+  in1 = _mm_adds_epi16(in1, final_rounding);
+  in2 = _mm_adds_epi16(in2, final_rounding);
+  in3 = _mm_adds_epi16(in3, final_rounding);
+  in4 = _mm_adds_epi16(in4, final_rounding);
+  in5 = _mm_adds_epi16(in5, final_rounding);
+  in6 = _mm_adds_epi16(in6, final_rounding);
+  in7 = _mm_adds_epi16(in7, final_rounding);
+
+  in0 = _mm_srai_epi16(in0, 5);
+  in1 = _mm_srai_epi16(in1, 5);
+  in2 = _mm_srai_epi16(in2, 5);
+  in3 = _mm_srai_epi16(in3, 5);
+  in4 = _mm_srai_epi16(in4, 5);
+  in5 = _mm_srai_epi16(in5, 5);
+  in6 = _mm_srai_epi16(in6, 5);
+  in7 = _mm_srai_epi16(in7, 5);
+
+  RECON_AND_STORE(dest + 0 * stride, in0);
+  RECON_AND_STORE(dest + 1 * stride, in1);
+  RECON_AND_STORE(dest + 2 * stride, in2);
+  RECON_AND_STORE(dest + 3 * stride, in3);
+  RECON_AND_STORE(dest + 4 * stride, in4);
+  RECON_AND_STORE(dest + 5 * stride, in5);
+  RECON_AND_STORE(dest + 6 * stride, in6);
+  RECON_AND_STORE(dest + 7 * stride, in7);
+}
+
+void vp10_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+  __m128i dc_value;
+  const __m128i zero = _mm_setzero_si128();
+  int a;
+
+  a = (int)dct_const_round_shift(input[0] * cospi_16_64);
+  a = (int)dct_const_round_shift(a * cospi_16_64);
+  a = ROUND_POWER_OF_TWO(a, 5);
+
+  dc_value = _mm_set1_epi16(a);
+
+  RECON_AND_STORE(dest + 0 * stride, dc_value);
+  RECON_AND_STORE(dest + 1 * stride, dc_value);
+  RECON_AND_STORE(dest + 2 * stride, dc_value);
+  RECON_AND_STORE(dest + 3 * stride, dc_value);
+  RECON_AND_STORE(dest + 4 * stride, dc_value);
+  RECON_AND_STORE(dest + 5 * stride, dc_value);
+  RECON_AND_STORE(dest + 6 * stride, dc_value);
+  RECON_AND_STORE(dest + 7 * stride, dc_value);
+}
+
+void vp10_idct8_sse2(__m128i *in) {
+  const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+  const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+  const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
+  const __m128i stg1_2 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+  const __m128i stg1_3 = pair_set_epi16(cospi_12_64, cospi_20_64);
+  const __m128i stg2_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
+  const __m128i stg2_1 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+  const __m128i stg2_2 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+  const __m128i stg2_3 = pair_set_epi16(cospi_8_64, cospi_24_64);
+
+  __m128i in0, in1, in2, in3, in4, in5, in6, in7;
+  __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7;
+  __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7;
+  __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+
+  // 8x8 Transpose is copied from vp10_fdct8x8_sse2()
+  TRANSPOSE_8X8(in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7], in0,
+                in1, in2, in3, in4, in5, in6, in7);
+
+  // 4-stage 1D vp10_idct8x8
+  IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in[0], in[1], in[2], in[3],
+        in[4], in[5], in[6], in[7]);
+}
+
+void vp10_iadst8_sse2(__m128i *in) {
+  const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
+  const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
+  const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64);
+  const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64);
+  const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64);
+  const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64);
+  const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64);
+  const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64);
+  const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
+  const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+  const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64);
+  const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+  const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
+  const __m128i k__const_0 = _mm_set1_epi16(0);
+  const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+
+  __m128i u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12, u13, u14, u15;
+  __m128i v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15;
+  __m128i w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;
+  __m128i s0, s1, s2, s3, s4, s5, s6, s7;
+  __m128i in0, in1, in2, in3, in4, in5, in6, in7;
+
+  // transpose
+  array_transpose_8x8(in, in);
+
+  // properly aligned for butterfly input
+  in0 = in[7];
+  in1 = in[0];
+  in2 = in[5];
+  in3 = in[2];
+  in4 = in[3];
+  in5 = in[4];
+  in6 = in[1];
+  in7 = in[6];
+
+  // column transformation
+  // stage 1
+  // interleave and multiply/add into 32-bit integer
+  s0 = _mm_unpacklo_epi16(in0, in1);
+  s1 = _mm_unpackhi_epi16(in0, in1);
+  s2 = _mm_unpacklo_epi16(in2, in3);
+  s3 = _mm_unpackhi_epi16(in2, in3);
+  s4 = _mm_unpacklo_epi16(in4, in5);
+  s5 = _mm_unpackhi_epi16(in4, in5);
+  s6 = _mm_unpacklo_epi16(in6, in7);
+  s7 = _mm_unpackhi_epi16(in6, in7);
+
+  u0 = _mm_madd_epi16(s0, k__cospi_p02_p30);
+  u1 = _mm_madd_epi16(s1, k__cospi_p02_p30);
+  u2 = _mm_madd_epi16(s0, k__cospi_p30_m02);
+  u3 = _mm_madd_epi16(s1, k__cospi_p30_m02);
+  u4 = _mm_madd_epi16(s2, k__cospi_p10_p22);
+  u5 = _mm_madd_epi16(s3, k__cospi_p10_p22);
+  u6 = _mm_madd_epi16(s2, k__cospi_p22_m10);
+  u7 = _mm_madd_epi16(s3, k__cospi_p22_m10);
+  u8 = _mm_madd_epi16(s4, k__cospi_p18_p14);
+  u9 = _mm_madd_epi16(s5, k__cospi_p18_p14);
+  u10 = _mm_madd_epi16(s4, k__cospi_p14_m18);
+  u11 = _mm_madd_epi16(s5, k__cospi_p14_m18);
+  u12 = _mm_madd_epi16(s6, k__cospi_p26_p06);
+  u13 = _mm_madd_epi16(s7, k__cospi_p26_p06);
+  u14 = _mm_madd_epi16(s6, k__cospi_p06_m26);
+  u15 = _mm_madd_epi16(s7, k__cospi_p06_m26);
+
+  // addition
+  w0 = _mm_add_epi32(u0, u8);
+  w1 = _mm_add_epi32(u1, u9);
+  w2 = _mm_add_epi32(u2, u10);
+  w3 = _mm_add_epi32(u3, u11);
+  w4 = _mm_add_epi32(u4, u12);
+  w5 = _mm_add_epi32(u5, u13);
+  w6 = _mm_add_epi32(u6, u14);
+  w7 = _mm_add_epi32(u7, u15);
+  w8 = _mm_sub_epi32(u0, u8);
+  w9 = _mm_sub_epi32(u1, u9);
+  w10 = _mm_sub_epi32(u2, u10);
+  w11 = _mm_sub_epi32(u3, u11);
+  w12 = _mm_sub_epi32(u4, u12);
+  w13 = _mm_sub_epi32(u5, u13);
+  w14 = _mm_sub_epi32(u6, u14);
+  w15 = _mm_sub_epi32(u7, u15);
+
+  // shift and rounding
+  v0 = _mm_add_epi32(w0, k__DCT_CONST_ROUNDING);
+  v1 = _mm_add_epi32(w1, k__DCT_CONST_ROUNDING);
+  v2 = _mm_add_epi32(w2, k__DCT_CONST_ROUNDING);
+  v3 = _mm_add_epi32(w3, k__DCT_CONST_ROUNDING);
+  v4 = _mm_add_epi32(w4, k__DCT_CONST_ROUNDING);
+  v5 = _mm_add_epi32(w5, k__DCT_CONST_ROUNDING);
+  v6 = _mm_add_epi32(w6, k__DCT_CONST_ROUNDING);
+  v7 = _mm_add_epi32(w7, k__DCT_CONST_ROUNDING);
+  v8 = _mm_add_epi32(w8, k__DCT_CONST_ROUNDING);
+  v9 = _mm_add_epi32(w9, k__DCT_CONST_ROUNDING);
+  v10 = _mm_add_epi32(w10, k__DCT_CONST_ROUNDING);
+  v11 = _mm_add_epi32(w11, k__DCT_CONST_ROUNDING);
+  v12 = _mm_add_epi32(w12, k__DCT_CONST_ROUNDING);
+  v13 = _mm_add_epi32(w13, k__DCT_CONST_ROUNDING);
+  v14 = _mm_add_epi32(w14, k__DCT_CONST_ROUNDING);
+  v15 = _mm_add_epi32(w15, k__DCT_CONST_ROUNDING);
+
+  u0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+  u1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+  u2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+  u3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+  u4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+  u5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
+  u6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+  u7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
+  u8 = _mm_srai_epi32(v8, DCT_CONST_BITS);
+  u9 = _mm_srai_epi32(v9, DCT_CONST_BITS);
+  u10 = _mm_srai_epi32(v10, DCT_CONST_BITS);
+  u11 = _mm_srai_epi32(v11, DCT_CONST_BITS);
+  u12 = _mm_srai_epi32(v12, DCT_CONST_BITS);
+  u13 = _mm_srai_epi32(v13, DCT_CONST_BITS);
+  u14 = _mm_srai_epi32(v14, DCT_CONST_BITS);
+  u15 = _mm_srai_epi32(v15, DCT_CONST_BITS);
+
+  // back to 16-bit and pack 8 integers into __m128i
+  in[0] = _mm_packs_epi32(u0, u1);
+  in[1] = _mm_packs_epi32(u2, u3);
+  in[2] = _mm_packs_epi32(u4, u5);
+  in[3] = _mm_packs_epi32(u6, u7);
+  in[4] = _mm_packs_epi32(u8, u9);
+  in[5] = _mm_packs_epi32(u10, u11);
+  in[6] = _mm_packs_epi32(u12, u13);
+  in[7] = _mm_packs_epi32(u14, u15);
+
+  // stage 2
+  s0 = _mm_add_epi16(in[0], in[2]);
+  s1 = _mm_add_epi16(in[1], in[3]);
+  s2 = _mm_sub_epi16(in[0], in[2]);
+  s3 = _mm_sub_epi16(in[1], in[3]);
+  u0 = _mm_unpacklo_epi16(in[4], in[5]);
+  u1 = _mm_unpackhi_epi16(in[4], in[5]);
+  u2 = _mm_unpacklo_epi16(in[6], in[7]);
+  u3 = _mm_unpackhi_epi16(in[6], in[7]);
+
+  v0 = _mm_madd_epi16(u0, k__cospi_p08_p24);
+  v1 = _mm_madd_epi16(u1, k__cospi_p08_p24);
+  v2 = _mm_madd_epi16(u0, k__cospi_p24_m08);
+  v3 = _mm_madd_epi16(u1, k__cospi_p24_m08);
+  v4 = _mm_madd_epi16(u2, k__cospi_m24_p08);
+  v5 = _mm_madd_epi16(u3, k__cospi_m24_p08);
+  v6 = _mm_madd_epi16(u2, k__cospi_p08_p24);
+  v7 = _mm_madd_epi16(u3, k__cospi_p08_p24);
+
+  w0 = _mm_add_epi32(v0, v4);
+  w1 = _mm_add_epi32(v1, v5);
+  w2 = _mm_add_epi32(v2, v6);
+  w3 = _mm_add_epi32(v3, v7);
+  w4 = _mm_sub_epi32(v0, v4);
+  w5 = _mm_sub_epi32(v1, v5);
+  w6 = _mm_sub_epi32(v2, v6);
+  w7 = _mm_sub_epi32(v3, v7);
+
+  v0 = _mm_add_epi32(w0, k__DCT_CONST_ROUNDING);
+  v1 = _mm_add_epi32(w1, k__DCT_CONST_ROUNDING);
+  v2 = _mm_add_epi32(w2, k__DCT_CONST_ROUNDING);
+  v3 = _mm_add_epi32(w3, k__DCT_CONST_ROUNDING);
+  v4 = _mm_add_epi32(w4, k__DCT_CONST_ROUNDING);
+  v5 = _mm_add_epi32(w5, k__DCT_CONST_ROUNDING);
+  v6 = _mm_add_epi32(w6, k__DCT_CONST_ROUNDING);
+  v7 = _mm_add_epi32(w7, k__DCT_CONST_ROUNDING);
+
+  u0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+  u1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+  u2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+  u3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+  u4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+  u5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
+  u6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+  u7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
+
+  // back to 16-bit intergers
+  s4 = _mm_packs_epi32(u0, u1);
+  s5 = _mm_packs_epi32(u2, u3);
+  s6 = _mm_packs_epi32(u4, u5);
+  s7 = _mm_packs_epi32(u6, u7);
+
+  // stage 3
+  u0 = _mm_unpacklo_epi16(s2, s3);
+  u1 = _mm_unpackhi_epi16(s2, s3);
+  u2 = _mm_unpacklo_epi16(s6, s7);
+  u3 = _mm_unpackhi_epi16(s6, s7);
+
+  v0 = _mm_madd_epi16(u0, k__cospi_p16_p16);
+  v1 = _mm_madd_epi16(u1, k__cospi_p16_p16);
+  v2 = _mm_madd_epi16(u0, k__cospi_p16_m16);
+  v3 = _mm_madd_epi16(u1, k__cospi_p16_m16);
+  v4 = _mm_madd_epi16(u2, k__cospi_p16_p16);
+  v5 = _mm_madd_epi16(u3, k__cospi_p16_p16);
+  v6 = _mm_madd_epi16(u2, k__cospi_p16_m16);
+  v7 = _mm_madd_epi16(u3, k__cospi_p16_m16);
+
+  u0 = _mm_add_epi32(v0, k__DCT_CONST_ROUNDING);
+  u1 = _mm_add_epi32(v1, k__DCT_CONST_ROUNDING);
+  u2 = _mm_add_epi32(v2, k__DCT_CONST_ROUNDING);
+  u3 = _mm_add_epi32(v3, k__DCT_CONST_ROUNDING);
+  u4 = _mm_add_epi32(v4, k__DCT_CONST_ROUNDING);
+  u5 = _mm_add_epi32(v5, k__DCT_CONST_ROUNDING);
+  u6 = _mm_add_epi32(v6, k__DCT_CONST_ROUNDING);
+  u7 = _mm_add_epi32(v7, k__DCT_CONST_ROUNDING);
+
+  v0 = _mm_srai_epi32(u0, DCT_CONST_BITS);
+  v1 = _mm_srai_epi32(u1, DCT_CONST_BITS);
+  v2 = _mm_srai_epi32(u2, DCT_CONST_BITS);
+  v3 = _mm_srai_epi32(u3, DCT_CONST_BITS);
+  v4 = _mm_srai_epi32(u4, DCT_CONST_BITS);
+  v5 = _mm_srai_epi32(u5, DCT_CONST_BITS);
+  v6 = _mm_srai_epi32(u6, DCT_CONST_BITS);
+  v7 = _mm_srai_epi32(u7, DCT_CONST_BITS);
+
+  s2 = _mm_packs_epi32(v0, v1);
+  s3 = _mm_packs_epi32(v2, v3);
+  s6 = _mm_packs_epi32(v4, v5);
+  s7 = _mm_packs_epi32(v6, v7);
+
+  in[0] = s0;
+  in[1] = _mm_sub_epi16(k__const_0, s4);
+  in[2] = s6;
+  in[3] = _mm_sub_epi16(k__const_0, s2);
+  in[4] = s3;
+  in[5] = _mm_sub_epi16(k__const_0, s7);
+  in[6] = s5;
+  in[7] = _mm_sub_epi16(k__const_0, s1);
+}
+
+void vp10_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+  const __m128i final_rounding = _mm_set1_epi16(1 << 4);
+  const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+  const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
+  const __m128i stg1_2 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+  const __m128i stg1_3 = pair_set_epi16(cospi_12_64, cospi_20_64);
+  const __m128i stg2_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
+  const __m128i stg2_1 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+  const __m128i stg2_2 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+  const __m128i stg2_3 = pair_set_epi16(cospi_8_64, cospi_24_64);
+  const __m128i stg3_0 = pair_set_epi16(-cospi_16_64, cospi_16_64);
+
+  __m128i in0, in1, in2, in3, in4, in5, in6, in7;
+  __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7;
+  __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7;
+  __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+
+  // Rows. Load 4-row input data.
+  in0 = _mm_load_si128((const __m128i *)input);
+  in1 = _mm_load_si128((const __m128i *)(input + 8 * 1));
+  in2 = _mm_load_si128((const __m128i *)(input + 8 * 2));
+  in3 = _mm_load_si128((const __m128i *)(input + 8 * 3));
+
+  // 8x4 Transpose
+  TRANSPOSE_8X8_10(in0, in1, in2, in3, in0, in1);
+  // Stage1
+  {
+    const __m128i lo_17 = _mm_unpackhi_epi16(in0, zero);
+    const __m128i lo_35 = _mm_unpackhi_epi16(in1, zero);
+
+    tmp0 = _mm_madd_epi16(lo_17, stg1_0);
+    tmp2 = _mm_madd_epi16(lo_17, stg1_1);
+    tmp4 = _mm_madd_epi16(lo_35, stg1_2);
+    tmp6 = _mm_madd_epi16(lo_35, stg1_3);
+
+    tmp0 = _mm_add_epi32(tmp0, rounding);
+    tmp2 = _mm_add_epi32(tmp2, rounding);
+    tmp4 = _mm_add_epi32(tmp4, rounding);
+    tmp6 = _mm_add_epi32(tmp6, rounding);
+    tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);
+    tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);
+    tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS);
+    tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS);
+
+    stp1_4 = _mm_packs_epi32(tmp0, tmp2);
+    stp1_5 = _mm_packs_epi32(tmp4, tmp6);
+  }
+
+  // Stage2
+  {
+    const __m128i lo_04 = _mm_unpacklo_epi16(in0, zero);
+    const __m128i lo_26 = _mm_unpacklo_epi16(in1, zero);
+
+    tmp0 = _mm_madd_epi16(lo_04, stg2_0);
+    tmp2 = _mm_madd_epi16(lo_04, stg2_1);
+    tmp4 = _mm_madd_epi16(lo_26, stg2_2);
+    tmp6 = _mm_madd_epi16(lo_26, stg2_3);
+
+    tmp0 = _mm_add_epi32(tmp0, rounding);
+    tmp2 = _mm_add_epi32(tmp2, rounding);
+    tmp4 = _mm_add_epi32(tmp4, rounding);
+    tmp6 = _mm_add_epi32(tmp6, rounding);
+    tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);
+    tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);
+    tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS);
+    tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS);
+
+    stp2_0 = _mm_packs_epi32(tmp0, tmp2);
+    stp2_2 = _mm_packs_epi32(tmp6, tmp4);
+
+    tmp0 = _mm_adds_epi16(stp1_4, stp1_5);
+    tmp1 = _mm_subs_epi16(stp1_4, stp1_5);
+
+    stp2_4 = tmp0;
+    stp2_5 = _mm_unpacklo_epi64(tmp1, zero);
+    stp2_6 = _mm_unpackhi_epi64(tmp1, zero);
+  }
+
+  // Stage3
+  {
+    const __m128i lo_56 = _mm_unpacklo_epi16(stp2_5, stp2_6);
+
+    tmp4 = _mm_adds_epi16(stp2_0, stp2_2);
+    tmp6 = _mm_subs_epi16(stp2_0, stp2_2);
+
+    stp1_2 = _mm_unpackhi_epi64(tmp6, tmp4);
+    stp1_3 = _mm_unpacklo_epi64(tmp6, tmp4);
+
+    tmp0 = _mm_madd_epi16(lo_56, stg3_0);
+    tmp2 = _mm_madd_epi16(lo_56, stg2_0);  // stg3_1 = stg2_0
+
+    tmp0 = _mm_add_epi32(tmp0, rounding);
+    tmp2 = _mm_add_epi32(tmp2, rounding);
+    tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);
+    tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);
+
+    stp1_5 = _mm_packs_epi32(tmp0, tmp2);
+  }
+
+  // Stage4
+  tmp0 = _mm_adds_epi16(stp1_3, stp2_4);
+  tmp1 = _mm_adds_epi16(stp1_2, stp1_5);
+  tmp2 = _mm_subs_epi16(stp1_3, stp2_4);
+  tmp3 = _mm_subs_epi16(stp1_2, stp1_5);
+
+  TRANSPOSE_4X8_10(tmp0, tmp1, tmp2, tmp3, in0, in1, in2, in3)
+
+  IDCT8(in0, in1, in2, in3, zero, zero, zero, zero, in0, in1, in2, in3, in4,
+        in5, in6, in7);
+  // Final rounding and shift
+  in0 = _mm_adds_epi16(in0, final_rounding);
+  in1 = _mm_adds_epi16(in1, final_rounding);
+  in2 = _mm_adds_epi16(in2, final_rounding);
+  in3 = _mm_adds_epi16(in3, final_rounding);
+  in4 = _mm_adds_epi16(in4, final_rounding);
+  in5 = _mm_adds_epi16(in5, final_rounding);
+  in6 = _mm_adds_epi16(in6, final_rounding);
+  in7 = _mm_adds_epi16(in7, final_rounding);
+
+  in0 = _mm_srai_epi16(in0, 5);
+  in1 = _mm_srai_epi16(in1, 5);
+  in2 = _mm_srai_epi16(in2, 5);
+  in3 = _mm_srai_epi16(in3, 5);
+  in4 = _mm_srai_epi16(in4, 5);
+  in5 = _mm_srai_epi16(in5, 5);
+  in6 = _mm_srai_epi16(in6, 5);
+  in7 = _mm_srai_epi16(in7, 5);
+
+  RECON_AND_STORE(dest + 0 * stride, in0);
+  RECON_AND_STORE(dest + 1 * stride, in1);
+  RECON_AND_STORE(dest + 2 * stride, in2);
+  RECON_AND_STORE(dest + 3 * stride, in3);
+  RECON_AND_STORE(dest + 4 * stride, in4);
+  RECON_AND_STORE(dest + 5 * stride, in5);
+  RECON_AND_STORE(dest + 6 * stride, in6);
+  RECON_AND_STORE(dest + 7 * stride, in7);
+}
+
+#define IDCT16                                                                 \
+  /* Stage2 */                                                                 \
+  {                                                                            \
+    const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], in[15]);                 \
+    const __m128i hi_1_15 = _mm_unpackhi_epi16(in[1], in[15]);                 \
+    const __m128i lo_9_7 = _mm_unpacklo_epi16(in[9], in[7]);                   \
+    const __m128i hi_9_7 = _mm_unpackhi_epi16(in[9], in[7]);                   \
+    const __m128i lo_5_11 = _mm_unpacklo_epi16(in[5], in[11]);                 \
+    const __m128i hi_5_11 = _mm_unpackhi_epi16(in[5], in[11]);                 \
+    const __m128i lo_13_3 = _mm_unpacklo_epi16(in[13], in[3]);                 \
+    const __m128i hi_13_3 = _mm_unpackhi_epi16(in[13], in[3]);                 \
+                                                                               \
+    MULTIPLICATION_AND_ADD(lo_1_15, hi_1_15, lo_9_7, hi_9_7, stg2_0, stg2_1,   \
+                           stg2_2, stg2_3, stp2_8, stp2_15, stp2_9, stp2_14)   \
+                                                                               \
+    MULTIPLICATION_AND_ADD(lo_5_11, hi_5_11, lo_13_3, hi_13_3, stg2_4, stg2_5, \
+                           stg2_6, stg2_7, stp2_10, stp2_13, stp2_11, stp2_12) \
+  }                                                                            \
+                                                                               \
+  /* Stage3 */                                                                 \
+  {                                                                            \
+    const __m128i lo_2_14 = _mm_unpacklo_epi16(in[2], in[14]);                 \
+    const __m128i hi_2_14 = _mm_unpackhi_epi16(in[2], in[14]);                 \
+    const __m128i lo_10_6 = _mm_unpacklo_epi16(in[10], in[6]);                 \
+    const __m128i hi_10_6 = _mm_unpackhi_epi16(in[10], in[6]);                 \
+                                                                               \
+    MULTIPLICATION_AND_ADD(lo_2_14, hi_2_14, lo_10_6, hi_10_6, stg3_0, stg3_1, \
+                           stg3_2, stg3_3, stp1_4, stp1_7, stp1_5, stp1_6)     \
+                                                                               \
+    stp1_8_0 = _mm_add_epi16(stp2_8, stp2_9);                                  \
+    stp1_9 = _mm_sub_epi16(stp2_8, stp2_9);                                    \
+    stp1_10 = _mm_sub_epi16(stp2_11, stp2_10);                                 \
+    stp1_11 = _mm_add_epi16(stp2_11, stp2_10);                                 \
+                                                                               \
+    stp1_12_0 = _mm_add_epi16(stp2_12, stp2_13);                               \
+    stp1_13 = _mm_sub_epi16(stp2_12, stp2_13);                                 \
+    stp1_14 = _mm_sub_epi16(stp2_15, stp2_14);                                 \
+    stp1_15 = _mm_add_epi16(stp2_15, stp2_14);                                 \
+  }                                                                            \
+                                                                               \
+  /* Stage4 */                                                                 \
+  {                                                                            \
+    const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], in[8]);                   \
+    const __m128i hi_0_8 = _mm_unpackhi_epi16(in[0], in[8]);                   \
+    const __m128i lo_4_12 = _mm_unpacklo_epi16(in[4], in[12]);                 \
+    const __m128i hi_4_12 = _mm_unpackhi_epi16(in[4], in[12]);                 \
+                                                                               \
+    const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14);               \
+    const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14);               \
+    const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13);             \
+    const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13);             \
+                                                                               \
+    MULTIPLICATION_AND_ADD(lo_0_8, hi_0_8, lo_4_12, hi_4_12, stg4_0, stg4_1,   \
+                           stg4_2, stg4_3, stp2_0, stp2_1, stp2_2, stp2_3)     \
+                                                                               \
+    stp2_4 = _mm_add_epi16(stp1_4, stp1_5);                                    \
+    stp2_5 = _mm_sub_epi16(stp1_4, stp1_5);                                    \
+    stp2_6 = _mm_sub_epi16(stp1_7, stp1_6);                                    \
+    stp2_7 = _mm_add_epi16(stp1_7, stp1_6);                                    \
+                                                                               \
+    MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, stg4_4,       \
+                           stg4_5, stg4_6, stg4_7, stp2_9, stp2_14, stp2_10,   \
+                           stp2_13)                                            \
+  }                                                                            \
+                                                                               \
+  /* Stage5 */                                                                 \
+  {                                                                            \
+    const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5);                 \
+    const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5);                 \
+                                                                               \
+    stp1_0 = _mm_add_epi16(stp2_0, stp2_3);                                    \
+    stp1_1 = _mm_add_epi16(stp2_1, stp2_2);                                    \
+    stp1_2 = _mm_sub_epi16(stp2_1, stp2_2);                                    \
+    stp1_3 = _mm_sub_epi16(stp2_0, stp2_3);                                    \
+                                                                               \
+    tmp0 = _mm_madd_epi16(lo_6_5, stg4_1);                                     \
+    tmp1 = _mm_madd_epi16(hi_6_5, stg4_1);                                     \
+    tmp2 = _mm_madd_epi16(lo_6_5, stg4_0);                                     \
+    tmp3 = _mm_madd_epi16(hi_6_5, stg4_0);                                     \
+                                                                               \
+    tmp0 = _mm_add_epi32(tmp0, rounding);                                      \
+    tmp1 = _mm_add_epi32(tmp1, rounding);                                      \
+    tmp2 = _mm_add_epi32(tmp2, rounding);                                      \
+    tmp3 = _mm_add_epi32(tmp3, rounding);                                      \
+                                                                               \
+    tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);                               \
+    tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS);                               \
+    tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);                               \
+    tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS);                               \
+                                                                               \
+    stp1_5 = _mm_packs_epi32(tmp0, tmp1);                                      \
+    stp1_6 = _mm_packs_epi32(tmp2, tmp3);                                      \
+                                                                               \
+    stp1_8 = _mm_add_epi16(stp1_8_0, stp1_11);                                 \
+    stp1_9 = _mm_add_epi16(stp2_9, stp2_10);                                   \
+    stp1_10 = _mm_sub_epi16(stp2_9, stp2_10);                                  \
+    stp1_11 = _mm_sub_epi16(stp1_8_0, stp1_11);                                \
+                                                                               \
+    stp1_12 = _mm_sub_epi16(stp1_15, stp1_12_0);                               \
+    stp1_13 = _mm_sub_epi16(stp2_14, stp2_13);                                 \
+    stp1_14 = _mm_add_epi16(stp2_14, stp2_13);                                 \
+    stp1_15 = _mm_add_epi16(stp1_15, stp1_12_0);                               \
+  }                                                                            \
+                                                                               \
+  /* Stage6 */                                                                 \
+  {                                                                            \
+    const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13);             \
+    const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13);             \
+    const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12);             \
+    const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12);             \
+                                                                               \
+    stp2_0 = _mm_add_epi16(stp1_0, stp2_7);                                    \
+    stp2_1 = _mm_add_epi16(stp1_1, stp1_6);                                    \
+    stp2_2 = _mm_add_epi16(stp1_2, stp1_5);                                    \
+    stp2_3 = _mm_add_epi16(stp1_3, stp2_4);                                    \
+    stp2_4 = _mm_sub_epi16(stp1_3, stp2_4);                                    \
+    stp2_5 = _mm_sub_epi16(stp1_2, stp1_5);                                    \
+    stp2_6 = _mm_sub_epi16(stp1_1, stp1_6);                                    \
+    stp2_7 = _mm_sub_epi16(stp1_0, stp2_7);                                    \
+                                                                               \
+    MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, stg6_0,     \
+                           stg4_0, stg6_0, stg4_0, stp2_10, stp2_13, stp2_11,  \
+                           stp2_12)                                            \
+  }
+
+#define IDCT16_10                                                              \
+  /* Stage2 */                                                                 \
+  {                                                                            \
+    const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], zero);                   \
+    const __m128i hi_1_15 = _mm_unpackhi_epi16(in[1], zero);                   \
+    const __m128i lo_13_3 = _mm_unpacklo_epi16(zero, in[3]);                   \
+    const __m128i hi_13_3 = _mm_unpackhi_epi16(zero, in[3]);                   \
+                                                                               \
+    MULTIPLICATION_AND_ADD(lo_1_15, hi_1_15, lo_13_3, hi_13_3, stg2_0, stg2_1, \
+                           stg2_6, stg2_7, stp1_8_0, stp1_15, stp1_11,         \
+                           stp1_12_0)                                          \
+  }                                                                            \
+                                                                               \
+  /* Stage3 */                                                                 \
+  {                                                                            \
+    const __m128i lo_2_14 = _mm_unpacklo_epi16(in[2], zero);                   \
+    const __m128i hi_2_14 = _mm_unpackhi_epi16(in[2], zero);                   \
+                                                                               \
+    MULTIPLICATION_AND_ADD_2(lo_2_14, hi_2_14, stg3_0, stg3_1, stp2_4, stp2_7) \
+                                                                               \
+    stp1_9 = stp1_8_0;                                                         \
+    stp1_10 = stp1_11;                                                         \
+                                                                               \
+    stp1_13 = stp1_12_0;                                                       \
+    stp1_14 = stp1_15;                                                         \
+  }                                                                            \
+                                                                               \
+  /* Stage4 */                                                                 \
+  {                                                                            \
+    const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], zero);                    \
+    const __m128i hi_0_8 = _mm_unpackhi_epi16(in[0], zero);                    \
+                                                                               \
+    const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14);               \
+    const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14);               \
+    const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13);             \
+    const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13);             \
+                                                                               \
+    MULTIPLICATION_AND_ADD_2(lo_0_8, hi_0_8, stg4_0, stg4_1, stp1_0, stp1_1)   \
+    stp2_5 = stp2_4;                                                           \
+    stp2_6 = stp2_7;                                                           \
+                                                                               \
+    MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, stg4_4,       \
+                           stg4_5, stg4_6, stg4_7, stp2_9, stp2_14, stp2_10,   \
+                           stp2_13)                                            \
+  }                                                                            \
+                                                                               \
+  /* Stage5 */                                                                 \
+  {                                                                            \
+    const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5);                 \
+    const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5);                 \
+                                                                               \
+    stp1_2 = stp1_1;                                                           \
+    stp1_3 = stp1_0;                                                           \
+                                                                               \
+    tmp0 = _mm_madd_epi16(lo_6_5, stg4_1);                                     \
+    tmp1 = _mm_madd_epi16(hi_6_5, stg4_1);                                     \
+    tmp2 = _mm_madd_epi16(lo_6_5, stg4_0);                                     \
+    tmp3 = _mm_madd_epi16(hi_6_5, stg4_0);                                     \
+                                                                               \
+    tmp0 = _mm_add_epi32(tmp0, rounding);                                      \
+    tmp1 = _mm_add_epi32(tmp1, rounding);                                      \
+    tmp2 = _mm_add_epi32(tmp2, rounding);                                      \
+    tmp3 = _mm_add_epi32(tmp3, rounding);                                      \
+                                                                               \
+    tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);                               \
+    tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS);                               \
+    tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);                               \
+    tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS);                               \
+                                                                               \
+    stp1_5 = _mm_packs_epi32(tmp0, tmp1);                                      \
+    stp1_6 = _mm_packs_epi32(tmp2, tmp3);                                      \
+                                                                               \
+    stp1_8 = _mm_add_epi16(stp1_8_0, stp1_11);                                 \
+    stp1_9 = _mm_add_epi16(stp2_9, stp2_10);                                   \
+    stp1_10 = _mm_sub_epi16(stp2_9, stp2_10);                                  \
+    stp1_11 = _mm_sub_epi16(stp1_8_0, stp1_11);                                \
+                                                                               \
+    stp1_12 = _mm_sub_epi16(stp1_15, stp1_12_0);                               \
+    stp1_13 = _mm_sub_epi16(stp2_14, stp2_13);                                 \
+    stp1_14 = _mm_add_epi16(stp2_14, stp2_13);                                 \
+    stp1_15 = _mm_add_epi16(stp1_15, stp1_12_0);                               \
+  }                                                                            \
+                                                                               \
+  /* Stage6 */                                                                 \
+  {                                                                            \
+    const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13);             \
+    const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13);             \
+    const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12);             \
+    const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12);             \
+                                                                               \
+    stp2_0 = _mm_add_epi16(stp1_0, stp2_7);                                    \
+    stp2_1 = _mm_add_epi16(stp1_1, stp1_6);                                    \
+    stp2_2 = _mm_add_epi16(stp1_2, stp1_5);                                    \
+    stp2_3 = _mm_add_epi16(stp1_3, stp2_4);                                    \
+    stp2_4 = _mm_sub_epi16(stp1_3, stp2_4);                                    \
+    stp2_5 = _mm_sub_epi16(stp1_2, stp1_5);                                    \
+    stp2_6 = _mm_sub_epi16(stp1_1, stp1_6);                                    \
+    stp2_7 = _mm_sub_epi16(stp1_0, stp2_7);                                    \
+                                                                               \
+    MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, stg6_0,     \
+                           stg4_0, stg6_0, stg4_0, stp2_10, stp2_13, stp2_11,  \
+                           stp2_12)                                            \
+  }
+
+void vp10_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest,
+                                 int stride) {
+  const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+  const __m128i final_rounding = _mm_set1_epi16(1 << 5);
+  const __m128i zero = _mm_setzero_si128();
+
+  const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64);
+  const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64);
+  const __m128i stg2_2 = pair_set_epi16(cospi_14_64, -cospi_18_64);
+  const __m128i stg2_3 = pair_set_epi16(cospi_18_64, cospi_14_64);
+  const __m128i stg2_4 = pair_set_epi16(cospi_22_64, -cospi_10_64);
+  const __m128i stg2_5 = pair_set_epi16(cospi_10_64, cospi_22_64);
+  const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64);
+  const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64);
+
+  const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+  const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
+  const __m128i stg3_2 = pair_set_epi16(cospi_12_64, -cospi_20_64);
+  const __m128i stg3_3 = pair_set_epi16(cospi_20_64, cospi_12_64);
+
+  const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
+  const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+  const __m128i stg4_2 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+  const __m128i stg4_3 = pair_set_epi16(cospi_8_64, cospi_24_64);
+  const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+  const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64);
+  const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
+  const __m128i stg4_7 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+
+  const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64);
+
+  __m128i in[16], l[16], r[16], *curr1;
+  __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7,
+      stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15,
+      stp1_8_0, stp1_12_0;
+  __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7,
+      stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15;
+  __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  int i;
+
+  curr1 = l;
+  for (i = 0; i < 2; i++) {
+    // 1-D vp10_idct
+
+    // Load input data.
+    in[0] = _mm_load_si128((const __m128i *)input);
+    in[8] = _mm_load_si128((const __m128i *)(input + 8 * 1));
+    in[1] = _mm_load_si128((const __m128i *)(input + 8 * 2));
+    in[9] = _mm_load_si128((const __m128i *)(input + 8 * 3));
+    in[2] = _mm_load_si128((const __m128i *)(input + 8 * 4));
+    in[10] = _mm_load_si128((const __m128i *)(input + 8 * 5));
+    in[3] = _mm_load_si128((const __m128i *)(input + 8 * 6));
+    in[11] = _mm_load_si128((const __m128i *)(input + 8 * 7));
+    in[4] = _mm_load_si128((const __m128i *)(input + 8 * 8));
+    in[12] = _mm_load_si128((const __m128i *)(input + 8 * 9));
+    in[5] = _mm_load_si128((const __m128i *)(input + 8 * 10));
+    in[13] = _mm_load_si128((const __m128i *)(input + 8 * 11));
+    in[6] = _mm_load_si128((const __m128i *)(input + 8 * 12));
+    in[14] = _mm_load_si128((const __m128i *)(input + 8 * 13));
+    in[7] = _mm_load_si128((const __m128i *)(input + 8 * 14));
+    in[15] = _mm_load_si128((const __m128i *)(input + 8 * 15));
+
+    array_transpose_8x8(in, in);
+    array_transpose_8x8(in + 8, in + 8);
+
+    IDCT16
+
+    // Stage7
+    curr1[0] = _mm_add_epi16(stp2_0, stp1_15);
+    curr1[1] = _mm_add_epi16(stp2_1, stp1_14);
+    curr1[2] = _mm_add_epi16(stp2_2, stp2_13);
+    curr1[3] = _mm_add_epi16(stp2_3, stp2_12);
+    curr1[4] = _mm_add_epi16(stp2_4, stp2_11);
+    curr1[5] = _mm_add_epi16(stp2_5, stp2_10);
+    curr1[6] = _mm_add_epi16(stp2_6, stp1_9);
+    curr1[7] = _mm_add_epi16(stp2_7, stp1_8);
+    curr1[8] = _mm_sub_epi16(stp2_7, stp1_8);
+    curr1[9] = _mm_sub_epi16(stp2_6, stp1_9);
+    curr1[10] = _mm_sub_epi16(stp2_5, stp2_10);
+    curr1[11] = _mm_sub_epi16(stp2_4, stp2_11);
+    curr1[12] = _mm_sub_epi16(stp2_3, stp2_12);
+    curr1[13] = _mm_sub_epi16(stp2_2, stp2_13);
+    curr1[14] = _mm_sub_epi16(stp2_1, stp1_14);
+    curr1[15] = _mm_sub_epi16(stp2_0, stp1_15);
+
+    curr1 = r;
+    input += 128;
+  }
+  for (i = 0; i < 2; i++) {
+    int j;
+    // 1-D vp10_idct
+    array_transpose_8x8(l + i * 8, in);
+    array_transpose_8x8(r + i * 8, in + 8);
+
+    IDCT16
+
+    // 2-D
+    in[0] = _mm_add_epi16(stp2_0, stp1_15);
+    in[1] = _mm_add_epi16(stp2_1, stp1_14);
+    in[2] = _mm_add_epi16(stp2_2, stp2_13);
+    in[3] = _mm_add_epi16(stp2_3, stp2_12);
+    in[4] = _mm_add_epi16(stp2_4, stp2_11);
+    in[5] = _mm_add_epi16(stp2_5, stp2_10);
+    in[6] = _mm_add_epi16(stp2_6, stp1_9);
+    in[7] = _mm_add_epi16(stp2_7, stp1_8);
+    in[8] = _mm_sub_epi16(stp2_7, stp1_8);
+    in[9] = _mm_sub_epi16(stp2_6, stp1_9);
+    in[10] = _mm_sub_epi16(stp2_5, stp2_10);
+    in[11] = _mm_sub_epi16(stp2_4, stp2_11);
+    in[12] = _mm_sub_epi16(stp2_3, stp2_12);
+    in[13] = _mm_sub_epi16(stp2_2, stp2_13);
+    in[14] = _mm_sub_epi16(stp2_1, stp1_14);
+    in[15] = _mm_sub_epi16(stp2_0, stp1_15);
+
+    for (j = 0; j < 16; ++j) {
+      // Final rounding and shift
+      in[j] = _mm_adds_epi16(in[j], final_rounding);
+      in[j] = _mm_srai_epi16(in[j], 6);
+      RECON_AND_STORE(dest + j * stride, in[j]);
+    }
+
+    dest += 8;
+  }
+}
+
+void vp10_idct16x16_1_add_sse2(const int16_t *input, uint8_t *dest,
+                               int stride) {
+  __m128i dc_value;
+  const __m128i zero = _mm_setzero_si128();
+  int a, i;
+
+  a = (int)dct_const_round_shift(input[0] * cospi_16_64);
+  a = (int)dct_const_round_shift(a * cospi_16_64);
+  a = ROUND_POWER_OF_TWO(a, 6);
+
+  dc_value = _mm_set1_epi16(a);
+
+  for (i = 0; i < 2; ++i) {
+    RECON_AND_STORE(dest + 0 * stride, dc_value);
+    RECON_AND_STORE(dest + 1 * stride, dc_value);
+    RECON_AND_STORE(dest + 2 * stride, dc_value);
+    RECON_AND_STORE(dest + 3 * stride, dc_value);
+    RECON_AND_STORE(dest + 4 * stride, dc_value);
+    RECON_AND_STORE(dest + 5 * stride, dc_value);
+    RECON_AND_STORE(dest + 6 * stride, dc_value);
+    RECON_AND_STORE(dest + 7 * stride, dc_value);
+    RECON_AND_STORE(dest + 8 * stride, dc_value);
+    RECON_AND_STORE(dest + 9 * stride, dc_value);
+    RECON_AND_STORE(dest + 10 * stride, dc_value);
+    RECON_AND_STORE(dest + 11 * stride, dc_value);
+    RECON_AND_STORE(dest + 12 * stride, dc_value);
+    RECON_AND_STORE(dest + 13 * stride, dc_value);
+    RECON_AND_STORE(dest + 14 * stride, dc_value);
+    RECON_AND_STORE(dest + 15 * stride, dc_value);
+    dest += 8;
+  }
+}
+
+static void vp10_iadst16_8col(__m128i *in) {
+  // perform 16x16 1-D ADST for 8 columns
+  __m128i s[16], x[16], u[32], v[32];
+  const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64);
+  const __m128i k__cospi_p31_m01 = pair_set_epi16(cospi_31_64, -cospi_1_64);
+  const __m128i k__cospi_p05_p27 = pair_set_epi16(cospi_5_64, cospi_27_64);
+  const __m128i k__cospi_p27_m05 = pair_set_epi16(cospi_27_64, -cospi_5_64);
+  const __m128i k__cospi_p09_p23 = pair_set_epi16(cospi_9_64, cospi_23_64);
+  const __m128i k__cospi_p23_m09 = pair_set_epi16(cospi_23_64, -cospi_9_64);
+  const __m128i k__cospi_p13_p19 = pair_set_epi16(cospi_13_64, cospi_19_64);
+  const __m128i k__cospi_p19_m13 = pair_set_epi16(cospi_19_64, -cospi_13_64);
+  const __m128i k__cospi_p17_p15 = pair_set_epi16(cospi_17_64, cospi_15_64);
+  const __m128i k__cospi_p15_m17 = pair_set_epi16(cospi_15_64, -cospi_17_64);
+  const __m128i k__cospi_p21_p11 = pair_set_epi16(cospi_21_64, cospi_11_64);
+  const __m128i k__cospi_p11_m21 = pair_set_epi16(cospi_11_64, -cospi_21_64);
+  const __m128i k__cospi_p25_p07 = pair_set_epi16(cospi_25_64, cospi_7_64);
+  const __m128i k__cospi_p07_m25 = pair_set_epi16(cospi_7_64, -cospi_25_64);
+  const __m128i k__cospi_p29_p03 = pair_set_epi16(cospi_29_64, cospi_3_64);
+  const __m128i k__cospi_p03_m29 = pair_set_epi16(cospi_3_64, -cospi_29_64);
+  const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64);
+  const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+  const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64);
+  const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64);
+  const __m128i k__cospi_m28_p04 = pair_set_epi16(-cospi_28_64, cospi_4_64);
+  const __m128i k__cospi_m12_p20 = pair_set_epi16(-cospi_12_64, cospi_20_64);
+  const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
+  const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+  const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64);
+  const __m128i k__cospi_m16_m16 = _mm_set1_epi16((int16_t)-cospi_16_64);
+  const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
+  const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+  const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64);
+  const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+  const __m128i kZero = _mm_set1_epi16(0);
+
+  u[0] = _mm_unpacklo_epi16(in[15], in[0]);
+  u[1] = _mm_unpackhi_epi16(in[15], in[0]);
+  u[2] = _mm_unpacklo_epi16(in[13], in[2]);
+  u[3] = _mm_unpackhi_epi16(in[13], in[2]);
+  u[4] = _mm_unpacklo_epi16(in[11], in[4]);
+  u[5] = _mm_unpackhi_epi16(in[11], in[4]);
+  u[6] = _mm_unpacklo_epi16(in[9], in[6]);
+  u[7] = _mm_unpackhi_epi16(in[9], in[6]);
+  u[8] = _mm_unpacklo_epi16(in[7], in[8]);
+  u[9] = _mm_unpackhi_epi16(in[7], in[8]);
+  u[10] = _mm_unpacklo_epi16(in[5], in[10]);
+  u[11] = _mm_unpackhi_epi16(in[5], in[10]);
+  u[12] = _mm_unpacklo_epi16(in[3], in[12]);
+  u[13] = _mm_unpackhi_epi16(in[3], in[12]);
+  u[14] = _mm_unpacklo_epi16(in[1], in[14]);
+  u[15] = _mm_unpackhi_epi16(in[1], in[14]);
+
+  v[0] = _mm_madd_epi16(u[0], k__cospi_p01_p31);
+  v[1] = _mm_madd_epi16(u[1], k__cospi_p01_p31);
+  v[2] = _mm_madd_epi16(u[0], k__cospi_p31_m01);
+  v[3] = _mm_madd_epi16(u[1], k__cospi_p31_m01);
+  v[4] = _mm_madd_epi16(u[2], k__cospi_p05_p27);
+  v[5] = _mm_madd_epi16(u[3], k__cospi_p05_p27);
+  v[6] = _mm_madd_epi16(u[2], k__cospi_p27_m05);
+  v[7] = _mm_madd_epi16(u[3], k__cospi_p27_m05);
+  v[8] = _mm_madd_epi16(u[4], k__cospi_p09_p23);
+  v[9] = _mm_madd_epi16(u[5], k__cospi_p09_p23);
+  v[10] = _mm_madd_epi16(u[4], k__cospi_p23_m09);
+  v[11] = _mm_madd_epi16(u[5], k__cospi_p23_m09);
+  v[12] = _mm_madd_epi16(u[6], k__cospi_p13_p19);
+  v[13] = _mm_madd_epi16(u[7], k__cospi_p13_p19);
+  v[14] = _mm_madd_epi16(u[6], k__cospi_p19_m13);
+  v[15] = _mm_madd_epi16(u[7], k__cospi_p19_m13);
+  v[16] = _mm_madd_epi16(u[8], k__cospi_p17_p15);
+  v[17] = _mm_madd_epi16(u[9], k__cospi_p17_p15);
+  v[18] = _mm_madd_epi16(u[8], k__cospi_p15_m17);
+  v[19] = _mm_madd_epi16(u[9], k__cospi_p15_m17);
+  v[20] = _mm_madd_epi16(u[10], k__cospi_p21_p11);
+  v[21] = _mm_madd_epi16(u[11], k__cospi_p21_p11);
+  v[22] = _mm_madd_epi16(u[10], k__cospi_p11_m21);
+  v[23] = _mm_madd_epi16(u[11], k__cospi_p11_m21);
+  v[24] = _mm_madd_epi16(u[12], k__cospi_p25_p07);
+  v[25] = _mm_madd_epi16(u[13], k__cospi_p25_p07);
+  v[26] = _mm_madd_epi16(u[12], k__cospi_p07_m25);
+  v[27] = _mm_madd_epi16(u[13], k__cospi_p07_m25);
+  v[28] = _mm_madd_epi16(u[14], k__cospi_p29_p03);
+  v[29] = _mm_madd_epi16(u[15], k__cospi_p29_p03);
+  v[30] = _mm_madd_epi16(u[14], k__cospi_p03_m29);
+  v[31] = _mm_madd_epi16(u[15], k__cospi_p03_m29);
+
+  u[0] = _mm_add_epi32(v[0], v[16]);
+  u[1] = _mm_add_epi32(v[1], v[17]);
+  u[2] = _mm_add_epi32(v[2], v[18]);
+  u[3] = _mm_add_epi32(v[3], v[19]);
+  u[4] = _mm_add_epi32(v[4], v[20]);
+  u[5] = _mm_add_epi32(v[5], v[21]);
+  u[6] = _mm_add_epi32(v[6], v[22]);
+  u[7] = _mm_add_epi32(v[7], v[23]);
+  u[8] = _mm_add_epi32(v[8], v[24]);
+  u[9] = _mm_add_epi32(v[9], v[25]);
+  u[10] = _mm_add_epi32(v[10], v[26]);
+  u[11] = _mm_add_epi32(v[11], v[27]);
+  u[12] = _mm_add_epi32(v[12], v[28]);
+  u[13] = _mm_add_epi32(v[13], v[29]);
+  u[14] = _mm_add_epi32(v[14], v[30]);
+  u[15] = _mm_add_epi32(v[15], v[31]);
+  u[16] = _mm_sub_epi32(v[0], v[16]);
+  u[17] = _mm_sub_epi32(v[1], v[17]);
+  u[18] = _mm_sub_epi32(v[2], v[18]);
+  u[19] = _mm_sub_epi32(v[3], v[19]);
+  u[20] = _mm_sub_epi32(v[4], v[20]);
+  u[21] = _mm_sub_epi32(v[5], v[21]);
+  u[22] = _mm_sub_epi32(v[6], v[22]);
+  u[23] = _mm_sub_epi32(v[7], v[23]);
+  u[24] = _mm_sub_epi32(v[8], v[24]);
+  u[25] = _mm_sub_epi32(v[9], v[25]);
+  u[26] = _mm_sub_epi32(v[10], v[26]);
+  u[27] = _mm_sub_epi32(v[11], v[27]);
+  u[28] = _mm_sub_epi32(v[12], v[28]);
+  u[29] = _mm_sub_epi32(v[13], v[29]);
+  u[30] = _mm_sub_epi32(v[14], v[30]);
+  u[31] = _mm_sub_epi32(v[15], v[31]);
+
+  v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+  v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+  v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+  v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+  v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+  v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+  v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+  v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+  v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING);
+  v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING);
+  v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING);
+  v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING);
+  v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING);
+  v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING);
+  v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING);
+  v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING);
+  v[16] = _mm_add_epi32(u[16], k__DCT_CONST_ROUNDING);
+  v[17] = _mm_add_epi32(u[17], k__DCT_CONST_ROUNDING);
+  v[18] = _mm_add_epi32(u[18], k__DCT_CONST_ROUNDING);
+  v[19] = _mm_add_epi32(u[19], k__DCT_CONST_ROUNDING);
+  v[20] = _mm_add_epi32(u[20], k__DCT_CONST_ROUNDING);
+  v[21] = _mm_add_epi32(u[21], k__DCT_CONST_ROUNDING);
+  v[22] = _mm_add_epi32(u[22], k__DCT_CONST_ROUNDING);
+  v[23] = _mm_add_epi32(u[23], k__DCT_CONST_ROUNDING);
+  v[24] = _mm_add_epi32(u[24], k__DCT_CONST_ROUNDING);
+  v[25] = _mm_add_epi32(u[25], k__DCT_CONST_ROUNDING);
+  v[26] = _mm_add_epi32(u[26], k__DCT_CONST_ROUNDING);
+  v[27] = _mm_add_epi32(u[27], k__DCT_CONST_ROUNDING);
+  v[28] = _mm_add_epi32(u[28], k__DCT_CONST_ROUNDING);
+  v[29] = _mm_add_epi32(u[29], k__DCT_CONST_ROUNDING);
+  v[30] = _mm_add_epi32(u[30], k__DCT_CONST_ROUNDING);
+  v[31] = _mm_add_epi32(u[31], k__DCT_CONST_ROUNDING);
+
+  u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+  u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+  u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+  u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+  u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS);
+  u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS);
+  u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS);
+  u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS);
+  u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS);
+  u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS);
+  u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS);
+  u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS);
+  u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS);
+  u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS);
+  u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS);
+  u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS);
+  u[16] = _mm_srai_epi32(v[16], DCT_CONST_BITS);
+  u[17] = _mm_srai_epi32(v[17], DCT_CONST_BITS);
+  u[18] = _mm_srai_epi32(v[18], DCT_CONST_BITS);
+  u[19] = _mm_srai_epi32(v[19], DCT_CONST_BITS);
+  u[20] = _mm_srai_epi32(v[20], DCT_CONST_BITS);
+  u[21] = _mm_srai_epi32(v[21], DCT_CONST_BITS);
+  u[22] = _mm_srai_epi32(v[22], DCT_CONST_BITS);
+  u[23] = _mm_srai_epi32(v[23], DCT_CONST_BITS);
+  u[24] = _mm_srai_epi32(v[24], DCT_CONST_BITS);
+  u[25] = _mm_srai_epi32(v[25], DCT_CONST_BITS);
+  u[26] = _mm_srai_epi32(v[26], DCT_CONST_BITS);
+  u[27] = _mm_srai_epi32(v[27], DCT_CONST_BITS);
+  u[28] = _mm_srai_epi32(v[28], DCT_CONST_BITS);
+  u[29] = _mm_srai_epi32(v[29], DCT_CONST_BITS);
+  u[30] = _mm_srai_epi32(v[30], DCT_CONST_BITS);
+  u[31] = _mm_srai_epi32(v[31], DCT_CONST_BITS);
+
+  s[0] = _mm_packs_epi32(u[0], u[1]);
+  s[1] = _mm_packs_epi32(u[2], u[3]);
+  s[2] = _mm_packs_epi32(u[4], u[5]);
+  s[3] = _mm_packs_epi32(u[6], u[7]);
+  s[4] = _mm_packs_epi32(u[8], u[9]);
+  s[5] = _mm_packs_epi32(u[10], u[11]);
+  s[6] = _mm_packs_epi32(u[12], u[13]);
+  s[7] = _mm_packs_epi32(u[14], u[15]);
+  s[8] = _mm_packs_epi32(u[16], u[17]);
+  s[9] = _mm_packs_epi32(u[18], u[19]);
+  s[10] = _mm_packs_epi32(u[20], u[21]);
+  s[11] = _mm_packs_epi32(u[22], u[23]);
+  s[12] = _mm_packs_epi32(u[24], u[25]);
+  s[13] = _mm_packs_epi32(u[26], u[27]);
+  s[14] = _mm_packs_epi32(u[28], u[29]);
+  s[15] = _mm_packs_epi32(u[30], u[31]);
+
+  // stage 2
+  u[0] = _mm_unpacklo_epi16(s[8], s[9]);
+  u[1] = _mm_unpackhi_epi16(s[8], s[9]);
+  u[2] = _mm_unpacklo_epi16(s[10], s[11]);
+  u[3] = _mm_unpackhi_epi16(s[10], s[11]);
+  u[4] = _mm_unpacklo_epi16(s[12], s[13]);
+  u[5] = _mm_unpackhi_epi16(s[12], s[13]);
+  u[6] = _mm_unpacklo_epi16(s[14], s[15]);
+  u[7] = _mm_unpackhi_epi16(s[14], s[15]);
+
+  v[0] = _mm_madd_epi16(u[0], k__cospi_p04_p28);
+  v[1] = _mm_madd_epi16(u[1], k__cospi_p04_p28);
+  v[2] = _mm_madd_epi16(u[0], k__cospi_p28_m04);
+  v[3] = _mm_madd_epi16(u[1], k__cospi_p28_m04);
+  v[4] = _mm_madd_epi16(u[2], k__cospi_p20_p12);
+  v[5] = _mm_madd_epi16(u[3], k__cospi_p20_p12);
+  v[6] = _mm_madd_epi16(u[2], k__cospi_p12_m20);
+  v[7] = _mm_madd_epi16(u[3], k__cospi_p12_m20);
+  v[8] = _mm_madd_epi16(u[4], k__cospi_m28_p04);
+  v[9] = _mm_madd_epi16(u[5], k__cospi_m28_p04);
+  v[10] = _mm_madd_epi16(u[4], k__cospi_p04_p28);
+  v[11] = _mm_madd_epi16(u[5], k__cospi_p04_p28);
+  v[12] = _mm_madd_epi16(u[6], k__cospi_m12_p20);
+  v[13] = _mm_madd_epi16(u[7], k__cospi_m12_p20);
+  v[14] = _mm_madd_epi16(u[6], k__cospi_p20_p12);
+  v[15] = _mm_madd_epi16(u[7], k__cospi_p20_p12);
+
+  u[0] = _mm_add_epi32(v[0], v[8]);
+  u[1] = _mm_add_epi32(v[1], v[9]);
+  u[2] = _mm_add_epi32(v[2], v[10]);
+  u[3] = _mm_add_epi32(v[3], v[11]);
+  u[4] = _mm_add_epi32(v[4], v[12]);
+  u[5] = _mm_add_epi32(v[5], v[13]);
+  u[6] = _mm_add_epi32(v[6], v[14]);
+  u[7] = _mm_add_epi32(v[7], v[15]);
+  u[8] = _mm_sub_epi32(v[0], v[8]);
+  u[9] = _mm_sub_epi32(v[1], v[9]);
+  u[10] = _mm_sub_epi32(v[2], v[10]);
+  u[11] = _mm_sub_epi32(v[3], v[11]);
+  u[12] = _mm_sub_epi32(v[4], v[12]);
+  u[13] = _mm_sub_epi32(v[5], v[13]);
+  u[14] = _mm_sub_epi32(v[6], v[14]);
+  u[15] = _mm_sub_epi32(v[7], v[15]);
+
+  v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+  v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+  v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+  v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+  v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+  v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+  v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+  v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+  v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING);
+  v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING);
+  v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING);
+  v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING);
+  v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING);
+  v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING);
+  v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING);
+  v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING);
+
+  u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+  u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+  u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+  u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+  u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS);
+  u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS);
+  u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS);
+  u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS);
+  u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS);
+  u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS);
+  u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS);
+  u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS);
+  u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS);
+  u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS);
+  u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS);
+  u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS);
+
+  x[0] = _mm_add_epi16(s[0], s[4]);
+  x[1] = _mm_add_epi16(s[1], s[5]);
+  x[2] = _mm_add_epi16(s[2], s[6]);
+  x[3] = _mm_add_epi16(s[3], s[7]);
+  x[4] = _mm_sub_epi16(s[0], s[4]);
+  x[5] = _mm_sub_epi16(s[1], s[5]);
+  x[6] = _mm_sub_epi16(s[2], s[6]);
+  x[7] = _mm_sub_epi16(s[3], s[7]);
+  x[8] = _mm_packs_epi32(u[0], u[1]);
+  x[9] = _mm_packs_epi32(u[2], u[3]);
+  x[10] = _mm_packs_epi32(u[4], u[5]);
+  x[11] = _mm_packs_epi32(u[6], u[7]);
+  x[12] = _mm_packs_epi32(u[8], u[9]);
+  x[13] = _mm_packs_epi32(u[10], u[11]);
+  x[14] = _mm_packs_epi32(u[12], u[13]);
+  x[15] = _mm_packs_epi32(u[14], u[15]);
+
+  // stage 3
+  u[0] = _mm_unpacklo_epi16(x[4], x[5]);
+  u[1] = _mm_unpackhi_epi16(x[4], x[5]);
+  u[2] = _mm_unpacklo_epi16(x[6], x[7]);
+  u[3] = _mm_unpackhi_epi16(x[6], x[7]);
+  u[4] = _mm_unpacklo_epi16(x[12], x[13]);
+  u[5] = _mm_unpackhi_epi16(x[12], x[13]);
+  u[6] = _mm_unpacklo_epi16(x[14], x[15]);
+  u[7] = _mm_unpackhi_epi16(x[14], x[15]);
+
+  v[0] = _mm_madd_epi16(u[0], k__cospi_p08_p24);
+  v[1] = _mm_madd_epi16(u[1], k__cospi_p08_p24);
+  v[2] = _mm_madd_epi16(u[0], k__cospi_p24_m08);
+  v[3] = _mm_madd_epi16(u[1], k__cospi_p24_m08);
+  v[4] = _mm_madd_epi16(u[2], k__cospi_m24_p08);
+  v[5] = _mm_madd_epi16(u[3], k__cospi_m24_p08);
+  v[6] = _mm_madd_epi16(u[2], k__cospi_p08_p24);
+  v[7] = _mm_madd_epi16(u[3], k__cospi_p08_p24);
+  v[8] = _mm_madd_epi16(u[4], k__cospi_p08_p24);
+  v[9] = _mm_madd_epi16(u[5], k__cospi_p08_p24);
+  v[10] = _mm_madd_epi16(u[4], k__cospi_p24_m08);
+  v[11] = _mm_madd_epi16(u[5], k__cospi_p24_m08);
+  v[12] = _mm_madd_epi16(u[6], k__cospi_m24_p08);
+  v[13] = _mm_madd_epi16(u[7], k__cospi_m24_p08);
+  v[14] = _mm_madd_epi16(u[6], k__cospi_p08_p24);
+  v[15] = _mm_madd_epi16(u[7], k__cospi_p08_p24);
+
+  u[0] = _mm_add_epi32(v[0], v[4]);
+  u[1] = _mm_add_epi32(v[1], v[5]);
+  u[2] = _mm_add_epi32(v[2], v[6]);
+  u[3] = _mm_add_epi32(v[3], v[7]);
+  u[4] = _mm_sub_epi32(v[0], v[4]);
+  u[5] = _mm_sub_epi32(v[1], v[5]);
+  u[6] = _mm_sub_epi32(v[2], v[6]);
+  u[7] = _mm_sub_epi32(v[3], v[7]);
+  u[8] = _mm_add_epi32(v[8], v[12]);
+  u[9] = _mm_add_epi32(v[9], v[13]);
+  u[10] = _mm_add_epi32(v[10], v[14]);
+  u[11] = _mm_add_epi32(v[11], v[15]);
+  u[12] = _mm_sub_epi32(v[8], v[12]);
+  u[13] = _mm_sub_epi32(v[9], v[13]);
+  u[14] = _mm_sub_epi32(v[10], v[14]);
+  u[15] = _mm_sub_epi32(v[11], v[15]);
+
+  u[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+  u[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+  u[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+  u[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+  u[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+  u[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+  u[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+  u[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+  u[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING);
+  u[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING);
+  u[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING);
+  u[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING);
+  u[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING);
+  u[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING);
+  u[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING);
+  u[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING);
+
+  v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+  v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+  v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+  v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+  v[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+  v[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+  v[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+  v[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+  v[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS);
+  v[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS);
+  v[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS);
+  v[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS);
+  v[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS);
+  v[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS);
+  v[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS);
+  v[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS);
+
+  s[0] = _mm_add_epi16(x[0], x[2]);
+  s[1] = _mm_add_epi16(x[1], x[3]);
+  s[2] = _mm_sub_epi16(x[0], x[2]);
+  s[3] = _mm_sub_epi16(x[1], x[3]);
+  s[4] = _mm_packs_epi32(v[0], v[1]);
+  s[5] = _mm_packs_epi32(v[2], v[3]);
+  s[6] = _mm_packs_epi32(v[4], v[5]);
+  s[7] = _mm_packs_epi32(v[6], v[7]);
+  s[8] = _mm_add_epi16(x[8], x[10]);
+  s[9] = _mm_add_epi16(x[9], x[11]);
+  s[10] = _mm_sub_epi16(x[8], x[10]);
+  s[11] = _mm_sub_epi16(x[9], x[11]);
+  s[12] = _mm_packs_epi32(v[8], v[9]);
+  s[13] = _mm_packs_epi32(v[10], v[11]);
+  s[14] = _mm_packs_epi32(v[12], v[13]);
+  s[15] = _mm_packs_epi32(v[14], v[15]);
+
+  // stage 4
+  u[0] = _mm_unpacklo_epi16(s[2], s[3]);
+  u[1] = _mm_unpackhi_epi16(s[2], s[3]);
+  u[2] = _mm_unpacklo_epi16(s[6], s[7]);
+  u[3] = _mm_unpackhi_epi16(s[6], s[7]);
+  u[4] = _mm_unpacklo_epi16(s[10], s[11]);
+  u[5] = _mm_unpackhi_epi16(s[10], s[11]);
+  u[6] = _mm_unpacklo_epi16(s[14], s[15]);
+  u[7] = _mm_unpackhi_epi16(s[14], s[15]);
+
+  v[0] = _mm_madd_epi16(u[0], k__cospi_m16_m16);
+  v[1] = _mm_madd_epi16(u[1], k__cospi_m16_m16);
+  v[2] = _mm_madd_epi16(u[0], k__cospi_p16_m16);
+  v[3] = _mm_madd_epi16(u[1], k__cospi_p16_m16);
+  v[4] = _mm_madd_epi16(u[2], k__cospi_p16_p16);
+  v[5] = _mm_madd_epi16(u[3], k__cospi_p16_p16);
+  v[6] = _mm_madd_epi16(u[2], k__cospi_m16_p16);
+  v[7] = _mm_madd_epi16(u[3], k__cospi_m16_p16);
+  v[8] = _mm_madd_epi16(u[4], k__cospi_p16_p16);
+  v[9] = _mm_madd_epi16(u[5], k__cospi_p16_p16);
+  v[10] = _mm_madd_epi16(u[4], k__cospi_m16_p16);
+  v[11] = _mm_madd_epi16(u[5], k__cospi_m16_p16);
+  v[12] = _mm_madd_epi16(u[6], k__cospi_m16_m16);
+  v[13] = _mm_madd_epi16(u[7], k__cospi_m16_m16);
+  v[14] = _mm_madd_epi16(u[6], k__cospi_p16_m16);
+  v[15] = _mm_madd_epi16(u[7], k__cospi_p16_m16);
+
+  u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+  u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+  u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+  u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+  u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING);
+  u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING);
+  u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING);
+  u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING);
+  u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING);
+  u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING);
+  u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING);
+  u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING);
+  u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING);
+  u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING);
+  u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING);
+  u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING);
+
+  v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+  v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+  v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+  v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+  v[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+  v[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+  v[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+  v[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+  v[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS);
+  v[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS);
+  v[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS);
+  v[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS);
+  v[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS);
+  v[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS);
+  v[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS);
+  v[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS);
+
+  in[0] = s[0];
+  in[1] = _mm_sub_epi16(kZero, s[8]);
+  in[2] = s[12];
+  in[3] = _mm_sub_epi16(kZero, s[4]);
+  in[4] = _mm_packs_epi32(v[4], v[5]);
+  in[5] = _mm_packs_epi32(v[12], v[13]);
+  in[6] = _mm_packs_epi32(v[8], v[9]);
+  in[7] = _mm_packs_epi32(v[0], v[1]);
+  in[8] = _mm_packs_epi32(v[2], v[3]);
+  in[9] = _mm_packs_epi32(v[10], v[11]);
+  in[10] = _mm_packs_epi32(v[14], v[15]);
+  in[11] = _mm_packs_epi32(v[6], v[7]);
+  in[12] = s[5];
+  in[13] = _mm_sub_epi16(kZero, s[13]);
+  in[14] = s[9];
+  in[15] = _mm_sub_epi16(kZero, s[1]);
+}
+
+static void vp10_idct16_8col(__m128i *in) {
+  const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
+  const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
+  const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64);
+  const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64);
+  const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64);
+  const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64);
+  const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64);
+  const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64);
+  const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+  const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64);
+  const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64);
+  const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64);
+  const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
+  const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+  const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+  const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
+  const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+  const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
+  const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
+  const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64);
+  const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+  __m128i v[16], u[16], s[16], t[16];
+
+  // stage 1
+  s[0] = in[0];
+  s[1] = in[8];
+  s[2] = in[4];
+  s[3] = in[12];
+  s[4] = in[2];
+  s[5] = in[10];
+  s[6] = in[6];
+  s[7] = in[14];
+  s[8] = in[1];
+  s[9] = in[9];
+  s[10] = in[5];
+  s[11] = in[13];
+  s[12] = in[3];
+  s[13] = in[11];
+  s[14] = in[7];
+  s[15] = in[15];
+
+  // stage 2
+  u[0] = _mm_unpacklo_epi16(s[8], s[15]);
+  u[1] = _mm_unpackhi_epi16(s[8], s[15]);
+  u[2] = _mm_unpacklo_epi16(s[9], s[14]);
+  u[3] = _mm_unpackhi_epi16(s[9], s[14]);
+  u[4] = _mm_unpacklo_epi16(s[10], s[13]);
+  u[5] = _mm_unpackhi_epi16(s[10], s[13]);
+  u[6] = _mm_unpacklo_epi16(s[11], s[12]);
+  u[7] = _mm_unpackhi_epi16(s[11], s[12]);
+
+  v[0] = _mm_madd_epi16(u[0], k__cospi_p30_m02);
+  v[1] = _mm_madd_epi16(u[1], k__cospi_p30_m02);
+  v[2] = _mm_madd_epi16(u[0], k__cospi_p02_p30);
+  v[3] = _mm_madd_epi16(u[1], k__cospi_p02_p30);
+  v[4] = _mm_madd_epi16(u[2], k__cospi_p14_m18);
+  v[5] = _mm_madd_epi16(u[3], k__cospi_p14_m18);
+  v[6] = _mm_madd_epi16(u[2], k__cospi_p18_p14);
+  v[7] = _mm_madd_epi16(u[3], k__cospi_p18_p14);
+  v[8] = _mm_madd_epi16(u[4], k__cospi_p22_m10);
+  v[9] = _mm_madd_epi16(u[5], k__cospi_p22_m10);
+  v[10] = _mm_madd_epi16(u[4], k__cospi_p10_p22);
+  v[11] = _mm_madd_epi16(u[5], k__cospi_p10_p22);
+  v[12] = _mm_madd_epi16(u[6], k__cospi_p06_m26);
+  v[13] = _mm_madd_epi16(u[7], k__cospi_p06_m26);
+  v[14] = _mm_madd_epi16(u[6], k__cospi_p26_p06);
+  v[15] = _mm_madd_epi16(u[7], k__cospi_p26_p06);
+
+  u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+  u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+  u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+  u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+  u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING);
+  u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING);
+  u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING);
+  u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING);
+  u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING);
+  u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING);
+  u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING);
+  u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING);
+  u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING);
+  u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING);
+  u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING);
+  u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING);
+
+  u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+  u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+  u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+  u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+  u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+  u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+  u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+  u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+  u[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS);
+  u[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS);
+  u[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS);
+  u[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS);
+  u[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS);
+  u[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS);
+  u[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS);
+  u[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS);
+
+  s[8] = _mm_packs_epi32(u[0], u[1]);
+  s[15] = _mm_packs_epi32(u[2], u[3]);
+  s[9] = _mm_packs_epi32(u[4], u[5]);
+  s[14] = _mm_packs_epi32(u[6], u[7]);
+  s[10] = _mm_packs_epi32(u[8], u[9]);
+  s[13] = _mm_packs_epi32(u[10], u[11]);
+  s[11] = _mm_packs_epi32(u[12], u[13]);
+  s[12] = _mm_packs_epi32(u[14], u[15]);
+
+  // stage 3
+  t[0] = s[0];
+  t[1] = s[1];
+  t[2] = s[2];
+  t[3] = s[3];
+  u[0] = _mm_unpacklo_epi16(s[4], s[7]);
+  u[1] = _mm_unpackhi_epi16(s[4], s[7]);
+  u[2] = _mm_unpacklo_epi16(s[5], s[6]);
+  u[3] = _mm_unpackhi_epi16(s[5], s[6]);
+
+  v[0] = _mm_madd_epi16(u[0], k__cospi_p28_m04);
+  v[1] = _mm_madd_epi16(u[1], k__cospi_p28_m04);
+  v[2] = _mm_madd_epi16(u[0], k__cospi_p04_p28);
+  v[3] = _mm_madd_epi16(u[1], k__cospi_p04_p28);
+  v[4] = _mm_madd_epi16(u[2], k__cospi_p12_m20);
+  v[5] = _mm_madd_epi16(u[3], k__cospi_p12_m20);
+  v[6] = _mm_madd_epi16(u[2], k__cospi_p20_p12);
+  v[7] = _mm_madd_epi16(u[3], k__cospi_p20_p12);
+
+  u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+  u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+  u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+  u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+  u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING);
+  u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING);
+  u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING);
+  u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING);
+
+  u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+  u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+  u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+  u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+  u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+  u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+  u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+  u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+
+  t[4] = _mm_packs_epi32(u[0], u[1]);
+  t[7] = _mm_packs_epi32(u[2], u[3]);
+  t[5] = _mm_packs_epi32(u[4], u[5]);
+  t[6] = _mm_packs_epi32(u[6], u[7]);
+  t[8] = _mm_add_epi16(s[8], s[9]);
+  t[9] = _mm_sub_epi16(s[8], s[9]);
+  t[10] = _mm_sub_epi16(s[11], s[10]);
+  t[11] = _mm_add_epi16(s[10], s[11]);
+  t[12] = _mm_add_epi16(s[12], s[13]);
+  t[13] = _mm_sub_epi16(s[12], s[13]);
+  t[14] = _mm_sub_epi16(s[15], s[14]);
+  t[15] = _mm_add_epi16(s[14], s[15]);
+
+  // stage 4
+  u[0] = _mm_unpacklo_epi16(t[0], t[1]);
+  u[1] = _mm_unpackhi_epi16(t[0], t[1]);
+  u[2] = _mm_unpacklo_epi16(t[2], t[3]);
+  u[3] = _mm_unpackhi_epi16(t[2], t[3]);
+  u[4] = _mm_unpacklo_epi16(t[9], t[14]);
+  u[5] = _mm_unpackhi_epi16(t[9], t[14]);
+  u[6] = _mm_unpacklo_epi16(t[10], t[13]);
+  u[7] = _mm_unpackhi_epi16(t[10], t[13]);
+
+  v[0] = _mm_madd_epi16(u[0], k__cospi_p16_p16);
+  v[1] = _mm_madd_epi16(u[1], k__cospi_p16_p16);
+  v[2] = _mm_madd_epi16(u[0], k__cospi_p16_m16);
+  v[3] = _mm_madd_epi16(u[1], k__cospi_p16_m16);
+  v[4] = _mm_madd_epi16(u[2], k__cospi_p24_m08);
+  v[5] = _mm_madd_epi16(u[3], k__cospi_p24_m08);
+  v[6] = _mm_madd_epi16(u[2], k__cospi_p08_p24);
+  v[7] = _mm_madd_epi16(u[3], k__cospi_p08_p24);
+  v[8] = _mm_madd_epi16(u[4], k__cospi_m08_p24);
+  v[9] = _mm_madd_epi16(u[5], k__cospi_m08_p24);
+  v[10] = _mm_madd_epi16(u[4], k__cospi_p24_p08);
+  v[11] = _mm_madd_epi16(u[5], k__cospi_p24_p08);
+  v[12] = _mm_madd_epi16(u[6], k__cospi_m24_m08);
+  v[13] = _mm_madd_epi16(u[7], k__cospi_m24_m08);
+  v[14] = _mm_madd_epi16(u[6], k__cospi_m08_p24);
+  v[15] = _mm_madd_epi16(u[7], k__cospi_m08_p24);
+
+  u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+  u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+  u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+  u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+  u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING);
+  u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING);
+  u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING);
+  u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING);
+  u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING);
+  u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING);
+  u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING);
+  u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING);
+  u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING);
+  u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING);
+  u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING);
+  u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING);
+
+  u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+  u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+  u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+  u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+  u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+  u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+  u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+  u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+  u[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS);
+  u[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS);
+  u[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS);
+  u[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS);
+  u[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS);
+  u[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS);
+  u[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS);
+  u[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS);
+
+  s[0] = _mm_packs_epi32(u[0], u[1]);
+  s[1] = _mm_packs_epi32(u[2], u[3]);
+  s[2] = _mm_packs_epi32(u[4], u[5]);
+  s[3] = _mm_packs_epi32(u[6], u[7]);
+  s[4] = _mm_add_epi16(t[4], t[5]);
+  s[5] = _mm_sub_epi16(t[4], t[5]);
+  s[6] = _mm_sub_epi16(t[7], t[6]);
+  s[7] = _mm_add_epi16(t[6], t[7]);
+  s[8] = t[8];
+  s[15] = t[15];
+  s[9] = _mm_packs_epi32(u[8], u[9]);
+  s[14] = _mm_packs_epi32(u[10], u[11]);
+  s[10] = _mm_packs_epi32(u[12], u[13]);
+  s[13] = _mm_packs_epi32(u[14], u[15]);
+  s[11] = t[11];
+  s[12] = t[12];
+
+  // stage 5
+  t[0] = _mm_add_epi16(s[0], s[3]);
+  t[1] = _mm_add_epi16(s[1], s[2]);
+  t[2] = _mm_sub_epi16(s[1], s[2]);
+  t[3] = _mm_sub_epi16(s[0], s[3]);
+  t[4] = s[4];
+  t[7] = s[7];
+
+  u[0] = _mm_unpacklo_epi16(s[5], s[6]);
+  u[1] = _mm_unpackhi_epi16(s[5], s[6]);
+  v[0] = _mm_madd_epi16(u[0], k__cospi_m16_p16);
+  v[1] = _mm_madd_epi16(u[1], k__cospi_m16_p16);
+  v[2] = _mm_madd_epi16(u[0], k__cospi_p16_p16);
+  v[3] = _mm_madd_epi16(u[1], k__cospi_p16_p16);
+  u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+  u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+  u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+  u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+  u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+  u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+  u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+  u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+  t[5] = _mm_packs_epi32(u[0], u[1]);
+  t[6] = _mm_packs_epi32(u[2], u[3]);
+
+  t[8] = _mm_add_epi16(s[8], s[11]);
+  t[9] = _mm_add_epi16(s[9], s[10]);
+  t[10] = _mm_sub_epi16(s[9], s[10]);
+  t[11] = _mm_sub_epi16(s[8], s[11]);
+  t[12] = _mm_sub_epi16(s[15], s[12]);
+  t[13] = _mm_sub_epi16(s[14], s[13]);
+  t[14] = _mm_add_epi16(s[13], s[14]);
+  t[15] = _mm_add_epi16(s[12], s[15]);
+
+  // stage 6
+  s[0] = _mm_add_epi16(t[0], t[7]);
+  s[1] = _mm_add_epi16(t[1], t[6]);
+  s[2] = _mm_add_epi16(t[2], t[5]);
+  s[3] = _mm_add_epi16(t[3], t[4]);
+  s[4] = _mm_sub_epi16(t[3], t[4]);
+  s[5] = _mm_sub_epi16(t[2], t[5]);
+  s[6] = _mm_sub_epi16(t[1], t[6]);
+  s[7] = _mm_sub_epi16(t[0], t[7]);
+  s[8] = t[8];
+  s[9] = t[9];
+
+  u[0] = _mm_unpacklo_epi16(t[10], t[13]);
+  u[1] = _mm_unpackhi_epi16(t[10], t[13]);
+  u[2] = _mm_unpacklo_epi16(t[11], t[12]);
+  u[3] = _mm_unpackhi_epi16(t[11], t[12]);
+
+  v[0] = _mm_madd_epi16(u[0], k__cospi_m16_p16);
+  v[1] = _mm_madd_epi16(u[1], k__cospi_m16_p16);
+  v[2] = _mm_madd_epi16(u[0], k__cospi_p16_p16);
+  v[3] = _mm_madd_epi16(u[1], k__cospi_p16_p16);
+  v[4] = _mm_madd_epi16(u[2], k__cospi_m16_p16);
+  v[5] = _mm_madd_epi16(u[3], k__cospi_m16_p16);
+  v[6] = _mm_madd_epi16(u[2], k__cospi_p16_p16);
+  v[7] = _mm_madd_epi16(u[3], k__cospi_p16_p16);
+
+  u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+  u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+  u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+  u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+  u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING);
+  u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING);
+  u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING);
+  u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING);
+
+  u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+  u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+  u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+  u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+  u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+  u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+  u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+  u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+
+  s[10] = _mm_packs_epi32(u[0], u[1]);
+  s[13] = _mm_packs_epi32(u[2], u[3]);
+  s[11] = _mm_packs_epi32(u[4], u[5]);
+  s[12] = _mm_packs_epi32(u[6], u[7]);
+  s[14] = t[14];
+  s[15] = t[15];
+
+  // stage 7
+  in[0] = _mm_add_epi16(s[0], s[15]);
+  in[1] = _mm_add_epi16(s[1], s[14]);
+  in[2] = _mm_add_epi16(s[2], s[13]);
+  in[3] = _mm_add_epi16(s[3], s[12]);
+  in[4] = _mm_add_epi16(s[4], s[11]);
+  in[5] = _mm_add_epi16(s[5], s[10]);
+  in[6] = _mm_add_epi16(s[6], s[9]);
+  in[7] = _mm_add_epi16(s[7], s[8]);
+  in[8] = _mm_sub_epi16(s[7], s[8]);
+  in[9] = _mm_sub_epi16(s[6], s[9]);
+  in[10] = _mm_sub_epi16(s[5], s[10]);
+  in[11] = _mm_sub_epi16(s[4], s[11]);
+  in[12] = _mm_sub_epi16(s[3], s[12]);
+  in[13] = _mm_sub_epi16(s[2], s[13]);
+  in[14] = _mm_sub_epi16(s[1], s[14]);
+  in[15] = _mm_sub_epi16(s[0], s[15]);
+}
+
+void vp10_idct16_sse2(__m128i *in0, __m128i *in1) {
+  array_transpose_16x16(in0, in1);
+  vp10_idct16_8col(in0);
+  vp10_idct16_8col(in1);
+}
+
+void vp10_iadst16_sse2(__m128i *in0, __m128i *in1) {
+  array_transpose_16x16(in0, in1);
+  vp10_iadst16_8col(in0);
+  vp10_iadst16_8col(in1);
+}
+
+void vp10_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,
+                                int stride) {
+  const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+  const __m128i final_rounding = _mm_set1_epi16(1 << 5);
+  const __m128i zero = _mm_setzero_si128();
+
+  const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64);
+  const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64);
+  const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64);
+  const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64);
+
+  const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+  const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
+
+  const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
+  const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+  const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+  const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64);
+  const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
+  const __m128i stg4_7 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+
+  const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64);
+  __m128i in[16], l[16];
+  __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_8,
+      stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, stp1_8_0,
+      stp1_12_0;
+  __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7,
+      stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14;
+  __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  int i;
+  // First 1-D inverse DCT
+  // Load input data.
+  in[0] = _mm_load_si128((const __m128i *)input);
+  in[1] = _mm_load_si128((const __m128i *)(input + 8 * 2));
+  in[2] = _mm_load_si128((const __m128i *)(input + 8 * 4));
+  in[3] = _mm_load_si128((const __m128i *)(input + 8 * 6));
+
+  TRANSPOSE_8X4(in[0], in[1], in[2], in[3], in[0], in[1]);
+
+  // Stage2
+  {
+    const __m128i lo_1_15 = _mm_unpackhi_epi16(in[0], zero);
+    const __m128i lo_13_3 = _mm_unpackhi_epi16(zero, in[1]);
+
+    tmp0 = _mm_madd_epi16(lo_1_15, stg2_0);
+    tmp2 = _mm_madd_epi16(lo_1_15, stg2_1);
+    tmp5 = _mm_madd_epi16(lo_13_3, stg2_6);
+    tmp7 = _mm_madd_epi16(lo_13_3, stg2_7);
+
+    tmp0 = _mm_add_epi32(tmp0, rounding);
+    tmp2 = _mm_add_epi32(tmp2, rounding);
+    tmp5 = _mm_add_epi32(tmp5, rounding);
+    tmp7 = _mm_add_epi32(tmp7, rounding);
+
+    tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);
+    tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);
+    tmp5 = _mm_srai_epi32(tmp5, DCT_CONST_BITS);
+    tmp7 = _mm_srai_epi32(tmp7, DCT_CONST_BITS);
+
+    stp2_8 = _mm_packs_epi32(tmp0, tmp2);
+    stp2_11 = _mm_packs_epi32(tmp5, tmp7);
+  }
+
+  // Stage3
+  {
+    const __m128i lo_2_14 = _mm_unpacklo_epi16(in[1], zero);
+
+    tmp0 = _mm_madd_epi16(lo_2_14, stg3_0);
+    tmp2 = _mm_madd_epi16(lo_2_14, stg3_1);
+
+    tmp0 = _mm_add_epi32(tmp0, rounding);
+    tmp2 = _mm_add_epi32(tmp2, rounding);
+    tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);
+    tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);
+
+    stp1_13 = _mm_unpackhi_epi64(stp2_11, zero);
+    stp1_14 = _mm_unpackhi_epi64(stp2_8, zero);
+
+    stp1_4 = _mm_packs_epi32(tmp0, tmp2);
+  }
+
+  // Stage4
+  {
+    const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], zero);
+    const __m128i lo_9_14 = _mm_unpacklo_epi16(stp2_8, stp1_14);
+    const __m128i lo_10_13 = _mm_unpacklo_epi16(stp2_11, stp1_13);
+
+    tmp0 = _mm_madd_epi16(lo_0_8, stg4_0);
+    tmp2 = _mm_madd_epi16(lo_0_8, stg4_1);
+    tmp1 = _mm_madd_epi16(lo_9_14, stg4_4);
+    tmp3 = _mm_madd_epi16(lo_9_14, stg4_5);
+    tmp5 = _mm_madd_epi16(lo_10_13, stg4_6);
+    tmp7 = _mm_madd_epi16(lo_10_13, stg4_7);
+
+    tmp0 = _mm_add_epi32(tmp0, rounding);
+    tmp2 = _mm_add_epi32(tmp2, rounding);
+    tmp1 = _mm_add_epi32(tmp1, rounding);
+    tmp3 = _mm_add_epi32(tmp3, rounding);
+    tmp5 = _mm_add_epi32(tmp5, rounding);
+    tmp7 = _mm_add_epi32(tmp7, rounding);
+
+    tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);
+    tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);
+    tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS);
+    tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS);
+    tmp5 = _mm_srai_epi32(tmp5, DCT_CONST_BITS);
+    tmp7 = _mm_srai_epi32(tmp7, DCT_CONST_BITS);
+
+    stp1_0 = _mm_packs_epi32(tmp0, tmp0);
+    stp1_1 = _mm_packs_epi32(tmp2, tmp2);
+    stp2_9 = _mm_packs_epi32(tmp1, tmp3);
+    stp2_10 = _mm_packs_epi32(tmp5, tmp7);
+
+    stp2_6 = _mm_unpackhi_epi64(stp1_4, zero);
+  }
+
+  // Stage5 and Stage6
+  {
+    tmp0 = _mm_add_epi16(stp2_8, stp2_11);
+    tmp1 = _mm_sub_epi16(stp2_8, stp2_11);
+    tmp2 = _mm_add_epi16(stp2_9, stp2_10);
+    tmp3 = _mm_sub_epi16(stp2_9, stp2_10);
+
+    stp1_9 = _mm_unpacklo_epi64(tmp2, zero);
+    stp1_10 = _mm_unpacklo_epi64(tmp3, zero);
+    stp1_8 = _mm_unpacklo_epi64(tmp0, zero);
+    stp1_11 = _mm_unpacklo_epi64(tmp1, zero);
+
+    stp1_13 = _mm_unpackhi_epi64(tmp3, zero);
+    stp1_14 = _mm_unpackhi_epi64(tmp2, zero);
+    stp1_12 = _mm_unpackhi_epi64(tmp1, zero);
+    stp1_15 = _mm_unpackhi_epi64(tmp0, zero);
+  }
+
+  // Stage6
+  {
+    const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp1_4);
+    const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13);
+    const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12);
+
+    tmp1 = _mm_madd_epi16(lo_6_5, stg4_1);
+    tmp3 = _mm_madd_epi16(lo_6_5, stg4_0);
+    tmp0 = _mm_madd_epi16(lo_10_13, stg6_0);
+    tmp2 = _mm_madd_epi16(lo_10_13, stg4_0);
+    tmp4 = _mm_madd_epi16(lo_11_12, stg6_0);
+    tmp6 = _mm_madd_epi16(lo_11_12, stg4_0);
+
+    tmp1 = _mm_add_epi32(tmp1, rounding);
+    tmp3 = _mm_add_epi32(tmp3, rounding);
+    tmp0 = _mm_add_epi32(tmp0, rounding);
+    tmp2 = _mm_add_epi32(tmp2, rounding);
+    tmp4 = _mm_add_epi32(tmp4, rounding);
+    tmp6 = _mm_add_epi32(tmp6, rounding);
+
+    tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS);
+    tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS);
+    tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);
+    tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);
+    tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS);
+    tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS);
+
+    stp1_6 = _mm_packs_epi32(tmp3, tmp1);
+
+    stp2_10 = _mm_packs_epi32(tmp0, zero);
+    stp2_13 = _mm_packs_epi32(tmp2, zero);
+    stp2_11 = _mm_packs_epi32(tmp4, zero);
+    stp2_12 = _mm_packs_epi32(tmp6, zero);
+
+    tmp0 = _mm_add_epi16(stp1_0, stp1_4);
+    tmp1 = _mm_sub_epi16(stp1_0, stp1_4);
+    tmp2 = _mm_add_epi16(stp1_1, stp1_6);
+    tmp3 = _mm_sub_epi16(stp1_1, stp1_6);
+
+    stp2_0 = _mm_unpackhi_epi64(tmp0, zero);
+    stp2_1 = _mm_unpacklo_epi64(tmp2, zero);
+    stp2_2 = _mm_unpackhi_epi64(tmp2, zero);
+    stp2_3 = _mm_unpacklo_epi64(tmp0, zero);
+    stp2_4 = _mm_unpacklo_epi64(tmp1, zero);
+    stp2_5 = _mm_unpackhi_epi64(tmp3, zero);
+    stp2_6 = _mm_unpacklo_epi64(tmp3, zero);
+    stp2_7 = _mm_unpackhi_epi64(tmp1, zero);
+  }
+
+  // Stage7. Left 8x16 only.
+  l[0] = _mm_add_epi16(stp2_0, stp1_15);
+  l[1] = _mm_add_epi16(stp2_1, stp1_14);
+  l[2] = _mm_add_epi16(stp2_2, stp2_13);
+  l[3] = _mm_add_epi16(stp2_3, stp2_12);
+  l[4] = _mm_add_epi16(stp2_4, stp2_11);
+  l[5] = _mm_add_epi16(stp2_5, stp2_10);
+  l[6] = _mm_add_epi16(stp2_6, stp1_9);
+  l[7] = _mm_add_epi16(stp2_7, stp1_8);
+  l[8] = _mm_sub_epi16(stp2_7, stp1_8);
+  l[9] = _mm_sub_epi16(stp2_6, stp1_9);
+  l[10] = _mm_sub_epi16(stp2_5, stp2_10);
+  l[11] = _mm_sub_epi16(stp2_4, stp2_11);
+  l[12] = _mm_sub_epi16(stp2_3, stp2_12);
+  l[13] = _mm_sub_epi16(stp2_2, stp2_13);
+  l[14] = _mm_sub_epi16(stp2_1, stp1_14);
+  l[15] = _mm_sub_epi16(stp2_0, stp1_15);
+
+  // Second 1-D inverse transform, performed per 8x16 block
+  for (i = 0; i < 2; i++) {
+    int j;
+    array_transpose_4X8(l + 8 * i, in);
+
+    IDCT16_10
+
+    // Stage7
+    in[0] = _mm_add_epi16(stp2_0, stp1_15);
+    in[1] = _mm_add_epi16(stp2_1, stp1_14);
+    in[2] = _mm_add_epi16(stp2_2, stp2_13);
+    in[3] = _mm_add_epi16(stp2_3, stp2_12);
+    in[4] = _mm_add_epi16(stp2_4, stp2_11);
+    in[5] = _mm_add_epi16(stp2_5, stp2_10);
+    in[6] = _mm_add_epi16(stp2_6, stp1_9);
+    in[7] = _mm_add_epi16(stp2_7, stp1_8);
+    in[8] = _mm_sub_epi16(stp2_7, stp1_8);
+    in[9] = _mm_sub_epi16(stp2_6, stp1_9);
+    in[10] = _mm_sub_epi16(stp2_5, stp2_10);
+    in[11] = _mm_sub_epi16(stp2_4, stp2_11);
+    in[12] = _mm_sub_epi16(stp2_3, stp2_12);
+    in[13] = _mm_sub_epi16(stp2_2, stp2_13);
+    in[14] = _mm_sub_epi16(stp2_1, stp1_14);
+    in[15] = _mm_sub_epi16(stp2_0, stp1_15);
+
+    for (j = 0; j < 16; ++j) {
+      // Final rounding and shift
+      in[j] = _mm_adds_epi16(in[j], final_rounding);
+      in[j] = _mm_srai_epi16(in[j], 6);
+      RECON_AND_STORE(dest + j * stride, in[j]);
+    }
+
+    dest += 8;
+  }
+}
+
+#define LOAD_DQCOEFF(reg, input)                  \
+  {                                               \
+    reg = _mm_load_si128((const __m128i *)input); \
+    input += 8;                                   \
+  }
+
+#define IDCT32_34                                                              \
+  /* Stage1 */                                                                 \
+  {                                                                            \
+    const __m128i zero = _mm_setzero_si128();                                  \
+    const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], zero);                   \
+    const __m128i hi_1_31 = _mm_unpackhi_epi16(in[1], zero);                   \
+                                                                               \
+    const __m128i lo_25_7 = _mm_unpacklo_epi16(zero, in[7]);                   \
+    const __m128i hi_25_7 = _mm_unpackhi_epi16(zero, in[7]);                   \
+                                                                               \
+    const __m128i lo_5_27 = _mm_unpacklo_epi16(in[5], zero);                   \
+    const __m128i hi_5_27 = _mm_unpackhi_epi16(in[5], zero);                   \
+                                                                               \
+    const __m128i lo_29_3 = _mm_unpacklo_epi16(zero, in[3]);                   \
+    const __m128i hi_29_3 = _mm_unpackhi_epi16(zero, in[3]);                   \
+                                                                               \
+    MULTIPLICATION_AND_ADD_2(lo_1_31, hi_1_31, stg1_0, stg1_1, stp1_16,        \
+                             stp1_31);                                         \
+    MULTIPLICATION_AND_ADD_2(lo_25_7, hi_25_7, stg1_6, stg1_7, stp1_19,        \
+                             stp1_28);                                         \
+    MULTIPLICATION_AND_ADD_2(lo_5_27, hi_5_27, stg1_8, stg1_9, stp1_20,        \
+                             stp1_27);                                         \
+    MULTIPLICATION_AND_ADD_2(lo_29_3, hi_29_3, stg1_14, stg1_15, stp1_23,      \
+                             stp1_24);                                         \
+  }                                                                            \
+                                                                               \
+  /* Stage2 */                                                                 \
+  {                                                                            \
+    const __m128i zero = _mm_setzero_si128();                                  \
+    const __m128i lo_2_30 = _mm_unpacklo_epi16(in[2], zero);                   \
+    const __m128i hi_2_30 = _mm_unpackhi_epi16(in[2], zero);                   \
+                                                                               \
+    const __m128i lo_26_6 = _mm_unpacklo_epi16(zero, in[6]);                   \
+    const __m128i hi_26_6 = _mm_unpackhi_epi16(zero, in[6]);                   \
+                                                                               \
+    MULTIPLICATION_AND_ADD_2(lo_2_30, hi_2_30, stg2_0, stg2_1, stp2_8,         \
+                             stp2_15);                                         \
+    MULTIPLICATION_AND_ADD_2(lo_26_6, hi_26_6, stg2_6, stg2_7, stp2_11,        \
+                             stp2_12);                                         \
+                                                                               \
+    stp2_16 = stp1_16;                                                         \
+    stp2_19 = stp1_19;                                                         \
+                                                                               \
+    stp2_20 = stp1_20;                                                         \
+    stp2_23 = stp1_23;                                                         \
+                                                                               \
+    stp2_24 = stp1_24;                                                         \
+    stp2_27 = stp1_27;                                                         \
+                                                                               \
+    stp2_28 = stp1_28;                                                         \
+    stp2_31 = stp1_31;                                                         \
+  }                                                                            \
+                                                                               \
+  /* Stage3 */                                                                 \
+  {                                                                            \
+    const __m128i zero = _mm_setzero_si128();                                  \
+    const __m128i lo_4_28 = _mm_unpacklo_epi16(in[4], zero);                   \
+    const __m128i hi_4_28 = _mm_unpackhi_epi16(in[4], zero);                   \
+                                                                               \
+    const __m128i lo_17_30 = _mm_unpacklo_epi16(stp1_16, stp1_31);             \
+    const __m128i hi_17_30 = _mm_unpackhi_epi16(stp1_16, stp1_31);             \
+    const __m128i lo_18_29 = _mm_unpacklo_epi16(stp1_19, stp1_28);             \
+    const __m128i hi_18_29 = _mm_unpackhi_epi16(stp1_19, stp1_28);             \
+                                                                               \
+    const __m128i lo_21_26 = _mm_unpacklo_epi16(stp1_20, stp1_27);             \
+    const __m128i hi_21_26 = _mm_unpackhi_epi16(stp1_20, stp1_27);             \
+    const __m128i lo_22_25 = _mm_unpacklo_epi16(stp1_23, stp1_24);             \
+    const __m128i hi_22_25 = _mm_unpackhi_epi16(stp1_23, stp2_24);             \
+                                                                               \
+    MULTIPLICATION_AND_ADD_2(lo_4_28, hi_4_28, stg3_0, stg3_1, stp1_4,         \
+                             stp1_7);                                          \
+                                                                               \
+    stp1_8 = stp2_8;                                                           \
+    stp1_11 = stp2_11;                                                         \
+    stp1_12 = stp2_12;                                                         \
+    stp1_15 = stp2_15;                                                         \
+                                                                               \
+    MULTIPLICATION_AND_ADD(lo_17_30, hi_17_30, lo_18_29, hi_18_29, stg3_4,     \
+                           stg3_5, stg3_6, stg3_4, stp1_17, stp1_30, stp1_18,  \
+                           stp1_29)                                            \
+    MULTIPLICATION_AND_ADD(lo_21_26, hi_21_26, lo_22_25, hi_22_25, stg3_8,     \
+                           stg3_9, stg3_10, stg3_8, stp1_21, stp1_26, stp1_22, \
+                           stp1_25)                                            \
+                                                                               \
+    stp1_16 = stp2_16;                                                         \
+    stp1_31 = stp2_31;                                                         \
+    stp1_19 = stp2_19;                                                         \
+    stp1_20 = stp2_20;                                                         \
+    stp1_23 = stp2_23;                                                         \
+    stp1_24 = stp2_24;                                                         \
+    stp1_27 = stp2_27;                                                         \
+    stp1_28 = stp2_28;                                                         \
+  }                                                                            \
+                                                                               \
+  /* Stage4 */                                                                 \
+  {                                                                            \
+    const __m128i zero = _mm_setzero_si128();                                  \
+    const __m128i lo_0_16 = _mm_unpacklo_epi16(in[0], zero);                   \
+    const __m128i hi_0_16 = _mm_unpackhi_epi16(in[0], zero);                   \
+                                                                               \
+    const __m128i lo_9_14 = _mm_unpacklo_epi16(stp2_8, stp2_15);               \
+    const __m128i hi_9_14 = _mm_unpackhi_epi16(stp2_8, stp2_15);               \
+    const __m128i lo_10_13 = _mm_unpacklo_epi16(stp2_11, stp2_12);             \
+    const __m128i hi_10_13 = _mm_unpackhi_epi16(stp2_11, stp2_12);             \
+                                                                               \
+    MULTIPLICATION_AND_ADD_2(lo_0_16, hi_0_16, stg4_0, stg4_1, stp2_0,         \
+                             stp2_1);                                          \
+                                                                               \
+    stp2_4 = stp1_4;                                                           \
+    stp2_5 = stp1_4;                                                           \
+    stp2_6 = stp1_7;                                                           \
+    stp2_7 = stp1_7;                                                           \
+                                                                               \
+    MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, stg4_4,       \
+                           stg4_5, stg4_6, stg4_4, stp2_9, stp2_14, stp2_10,   \
+                           stp2_13)                                            \
+                                                                               \
+    stp2_8 = stp1_8;                                                           \
+    stp2_15 = stp1_15;                                                         \
+    stp2_11 = stp1_11;                                                         \
+    stp2_12 = stp1_12;                                                         \
+                                                                               \
+    stp2_16 = _mm_add_epi16(stp1_16, stp1_19);                                 \
+    stp2_17 = _mm_add_epi16(stp1_17, stp1_18);                                 \
+    stp2_18 = _mm_sub_epi16(stp1_17, stp1_18);                                 \
+    stp2_19 = _mm_sub_epi16(stp1_16, stp1_19);                                 \
+    stp2_20 = _mm_sub_epi16(stp1_23, stp1_20);                                 \
+    stp2_21 = _mm_sub_epi16(stp1_22, stp1_21);                                 \
+    stp2_22 = _mm_add_epi16(stp1_22, stp1_21);                                 \
+    stp2_23 = _mm_add_epi16(stp1_23, stp1_20);                                 \
+                                                                               \
+    stp2_24 = _mm_add_epi16(stp1_24, stp1_27);                                 \
+    stp2_25 = _mm_add_epi16(stp1_25, stp1_26);                                 \
+    stp2_26 = _mm_sub_epi16(stp1_25, stp1_26);                                 \
+    stp2_27 = _mm_sub_epi16(stp1_24, stp1_27);                                 \
+    stp2_28 = _mm_sub_epi16(stp1_31, stp1_28);                                 \
+    stp2_29 = _mm_sub_epi16(stp1_30, stp1_29);                                 \
+    stp2_30 = _mm_add_epi16(stp1_29, stp1_30);                                 \
+    stp2_31 = _mm_add_epi16(stp1_28, stp1_31);                                 \
+  }                                                                            \
+                                                                               \
+  /* Stage5 */                                                                 \
+  {                                                                            \
+    const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5);                 \
+    const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5);                 \
+    const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29);             \
+    const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29);             \
+                                                                               \
+    const __m128i lo_19_28 = _mm_unpacklo_epi16(stp2_19, stp2_28);             \
+    const __m128i hi_19_28 = _mm_unpackhi_epi16(stp2_19, stp2_28);             \
+    const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27);             \
+    const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27);             \
+                                                                               \
+    const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26);             \
+    const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26);             \
+                                                                               \
+    stp1_0 = stp2_0;                                                           \
+    stp1_1 = stp2_1;                                                           \
+    stp1_2 = stp2_1;                                                           \
+    stp1_3 = stp2_0;                                                           \
+                                                                               \
+    tmp0 = _mm_madd_epi16(lo_6_5, stg4_1);                                     \
+    tmp1 = _mm_madd_epi16(hi_6_5, stg4_1);                                     \
+    tmp2 = _mm_madd_epi16(lo_6_5, stg4_0);                                     \
+    tmp3 = _mm_madd_epi16(hi_6_5, stg4_0);                                     \
+                                                                               \
+    tmp0 = _mm_add_epi32(tmp0, rounding);                                      \
+    tmp1 = _mm_add_epi32(tmp1, rounding);                                      \
+    tmp2 = _mm_add_epi32(tmp2, rounding);                                      \
+    tmp3 = _mm_add_epi32(tmp3, rounding);                                      \
+                                                                               \
+    tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);                               \
+    tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS);                               \
+    tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);                               \
+    tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS);                               \
+                                                                               \
+    stp1_5 = _mm_packs_epi32(tmp0, tmp1);                                      \
+    stp1_6 = _mm_packs_epi32(tmp2, tmp3);                                      \
+                                                                               \
+    stp1_4 = stp2_4;                                                           \
+    stp1_7 = stp2_7;                                                           \
+                                                                               \
+    stp1_8 = _mm_add_epi16(stp2_8, stp2_11);                                   \
+    stp1_9 = _mm_add_epi16(stp2_9, stp2_10);                                   \
+    stp1_10 = _mm_sub_epi16(stp2_9, stp2_10);                                  \
+    stp1_11 = _mm_sub_epi16(stp2_8, stp2_11);                                  \
+    stp1_12 = _mm_sub_epi16(stp2_15, stp2_12);                                 \
+    stp1_13 = _mm_sub_epi16(stp2_14, stp2_13);                                 \
+    stp1_14 = _mm_add_epi16(stp2_14, stp2_13);                                 \
+    stp1_15 = _mm_add_epi16(stp2_15, stp2_12);                                 \
+                                                                               \
+    stp1_16 = stp2_16;                                                         \
+    stp1_17 = stp2_17;                                                         \
+                                                                               \
+    MULTIPLICATION_AND_ADD(lo_18_29, hi_18_29, lo_19_28, hi_19_28, stg4_4,     \
+                           stg4_5, stg4_4, stg4_5, stp1_18, stp1_29, stp1_19,  \
+                           stp1_28)                                            \
+    MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg4_6,     \
+                           stg4_4, stg4_6, stg4_4, stp1_20, stp1_27, stp1_21,  \
+                           stp1_26)                                            \
+                                                                               \
+    stp1_22 = stp2_22;                                                         \
+    stp1_23 = stp2_23;                                                         \
+    stp1_24 = stp2_24;                                                         \
+    stp1_25 = stp2_25;                                                         \
+    stp1_30 = stp2_30;                                                         \
+    stp1_31 = stp2_31;                                                         \
+  }                                                                            \
+                                                                               \
+  /* Stage6 */                                                                 \
+  {                                                                            \
+    const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13);             \
+    const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13);             \
+    const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12);             \
+    const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12);             \
+                                                                               \
+    stp2_0 = _mm_add_epi16(stp1_0, stp1_7);                                    \
+    stp2_1 = _mm_add_epi16(stp1_1, stp1_6);                                    \
+    stp2_2 = _mm_add_epi16(stp1_2, stp1_5);                                    \
+    stp2_3 = _mm_add_epi16(stp1_3, stp1_4);                                    \
+    stp2_4 = _mm_sub_epi16(stp1_3, stp1_4);                                    \
+    stp2_5 = _mm_sub_epi16(stp1_2, stp1_5);                                    \
+    stp2_6 = _mm_sub_epi16(stp1_1, stp1_6);                                    \
+    stp2_7 = _mm_sub_epi16(stp1_0, stp1_7);                                    \
+                                                                               \
+    stp2_8 = stp1_8;                                                           \
+    stp2_9 = stp1_9;                                                           \
+    stp2_14 = stp1_14;                                                         \
+    stp2_15 = stp1_15;                                                         \
+                                                                               \
+    MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, stg6_0,     \
+                           stg4_0, stg6_0, stg4_0, stp2_10, stp2_13, stp2_11,  \
+                           stp2_12)                                            \
+                                                                               \
+    stp2_16 = _mm_add_epi16(stp1_16, stp1_23);                                 \
+    stp2_17 = _mm_add_epi16(stp1_17, stp1_22);                                 \
+    stp2_18 = _mm_add_epi16(stp1_18, stp1_21);                                 \
+    stp2_19 = _mm_add_epi16(stp1_19, stp1_20);                                 \
+    stp2_20 = _mm_sub_epi16(stp1_19, stp1_20);                                 \
+    stp2_21 = _mm_sub_epi16(stp1_18, stp1_21);                                 \
+    stp2_22 = _mm_sub_epi16(stp1_17, stp1_22);                                 \
+    stp2_23 = _mm_sub_epi16(stp1_16, stp1_23);                                 \
+                                                                               \
+    stp2_24 = _mm_sub_epi16(stp1_31, stp1_24);                                 \
+    stp2_25 = _mm_sub_epi16(stp1_30, stp1_25);                                 \
+    stp2_26 = _mm_sub_epi16(stp1_29, stp1_26);                                 \
+    stp2_27 = _mm_sub_epi16(stp1_28, stp1_27);                                 \
+    stp2_28 = _mm_add_epi16(stp1_27, stp1_28);                                 \
+    stp2_29 = _mm_add_epi16(stp1_26, stp1_29);                                 \
+    stp2_30 = _mm_add_epi16(stp1_25, stp1_30);                                 \
+    stp2_31 = _mm_add_epi16(stp1_24, stp1_31);                                 \
+  }                                                                            \
+                                                                               \
+  /* Stage7 */                                                                 \
+  {                                                                            \
+    const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27);             \
+    const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27);             \
+    const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26);             \
+    const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26);             \
+                                                                               \
+    const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25);             \
+    const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25);             \
+    const __m128i lo_23_24 = _mm_unpacklo_epi16(stp2_23, stp2_24);             \
+    const __m128i hi_23_24 = _mm_unpackhi_epi16(stp2_23, stp2_24);             \
+                                                                               \
+    stp1_0 = _mm_add_epi16(stp2_0, stp2_15);                                   \
+    stp1_1 = _mm_add_epi16(stp2_1, stp2_14);                                   \
+    stp1_2 = _mm_add_epi16(stp2_2, stp2_13);                                   \
+    stp1_3 = _mm_add_epi16(stp2_3, stp2_12);                                   \
+    stp1_4 = _mm_add_epi16(stp2_4, stp2_11);                                   \
+    stp1_5 = _mm_add_epi16(stp2_5, stp2_10);                                   \
+    stp1_6 = _mm_add_epi16(stp2_6, stp2_9);                                    \
+    stp1_7 = _mm_add_epi16(stp2_7, stp2_8);                                    \
+    stp1_8 = _mm_sub_epi16(stp2_7, stp2_8);                                    \
+    stp1_9 = _mm_sub_epi16(stp2_6, stp2_9);                                    \
+    stp1_10 = _mm_sub_epi16(stp2_5, stp2_10);                                  \
+    stp1_11 = _mm_sub_epi16(stp2_4, stp2_11);                                  \
+    stp1_12 = _mm_sub_epi16(stp2_3, stp2_12);                                  \
+    stp1_13 = _mm_sub_epi16(stp2_2, stp2_13);                                  \
+    stp1_14 = _mm_sub_epi16(stp2_1, stp2_14);                                  \
+    stp1_15 = _mm_sub_epi16(stp2_0, stp2_15);                                  \
+                                                                               \
+    stp1_16 = stp2_16;                                                         \
+    stp1_17 = stp2_17;                                                         \
+    stp1_18 = stp2_18;                                                         \
+    stp1_19 = stp2_19;                                                         \
+                                                                               \
+    MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg6_0,     \
+                           stg4_0, stg6_0, stg4_0, stp1_20, stp1_27, stp1_21,  \
+                           stp1_26)                                            \
+    MULTIPLICATION_AND_ADD(lo_22_25, hi_22_25, lo_23_24, hi_23_24, stg6_0,     \
+                           stg4_0, stg6_0, stg4_0, stp1_22, stp1_25, stp1_23,  \
+                           stp1_24)                                            \
+                                                                               \
+    stp1_28 = stp2_28;                                                         \
+    stp1_29 = stp2_29;                                                         \
+    stp1_30 = stp2_30;                                                         \
+    stp1_31 = stp2_31;                                                         \
+  }
+
+#define IDCT32                                                                 \
+  /* Stage1 */                                                                 \
+  {                                                                            \
+    const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], in[31]);                 \
+    const __m128i hi_1_31 = _mm_unpackhi_epi16(in[1], in[31]);                 \
+    const __m128i lo_17_15 = _mm_unpacklo_epi16(in[17], in[15]);               \
+    const __m128i hi_17_15 = _mm_unpackhi_epi16(in[17], in[15]);               \
+                                                                               \
+    const __m128i lo_9_23 = _mm_unpacklo_epi16(in[9], in[23]);                 \
+    const __m128i hi_9_23 = _mm_unpackhi_epi16(in[9], in[23]);                 \
+    const __m128i lo_25_7 = _mm_unpacklo_epi16(in[25], in[7]);                 \
+    const __m128i hi_25_7 = _mm_unpackhi_epi16(in[25], in[7]);                 \
+                                                                               \
+    const __m128i lo_5_27 = _mm_unpacklo_epi16(in[5], in[27]);                 \
+    const __m128i hi_5_27 = _mm_unpackhi_epi16(in[5], in[27]);                 \
+    const __m128i lo_21_11 = _mm_unpacklo_epi16(in[21], in[11]);               \
+    const __m128i hi_21_11 = _mm_unpackhi_epi16(in[21], in[11]);               \
+                                                                               \
+    const __m128i lo_13_19 = _mm_unpacklo_epi16(in[13], in[19]);               \
+    const __m128i hi_13_19 = _mm_unpackhi_epi16(in[13], in[19]);               \
+    const __m128i lo_29_3 = _mm_unpacklo_epi16(in[29], in[3]);                 \
+    const __m128i hi_29_3 = _mm_unpackhi_epi16(in[29], in[3]);                 \
+                                                                               \
+    MULTIPLICATION_AND_ADD(lo_1_31, hi_1_31, lo_17_15, hi_17_15, stg1_0,       \
+                           stg1_1, stg1_2, stg1_3, stp1_16, stp1_31, stp1_17,  \
+                           stp1_30)                                            \
+    MULTIPLICATION_AND_ADD(lo_9_23, hi_9_23, lo_25_7, hi_25_7, stg1_4, stg1_5, \
+                           stg1_6, stg1_7, stp1_18, stp1_29, stp1_19, stp1_28) \
+    MULTIPLICATION_AND_ADD(lo_5_27, hi_5_27, lo_21_11, hi_21_11, stg1_8,       \
+                           stg1_9, stg1_10, stg1_11, stp1_20, stp1_27,         \
+                           stp1_21, stp1_26)                                   \
+    MULTIPLICATION_AND_ADD(lo_13_19, hi_13_19, lo_29_3, hi_29_3, stg1_12,      \
+                           stg1_13, stg1_14, stg1_15, stp1_22, stp1_25,        \
+                           stp1_23, stp1_24)                                   \
+  }                                                                            \
+                                                                               \
+  /* Stage2 */                                                                 \
+  {                                                                            \
+    const __m128i lo_2_30 = _mm_unpacklo_epi16(in[2], in[30]);                 \
+    const __m128i hi_2_30 = _mm_unpackhi_epi16(in[2], in[30]);                 \
+    const __m128i lo_18_14 = _mm_unpacklo_epi16(in[18], in[14]);               \
+    const __m128i hi_18_14 = _mm_unpackhi_epi16(in[18], in[14]);               \
+                                                                               \
+    const __m128i lo_10_22 = _mm_unpacklo_epi16(in[10], in[22]);               \
+    const __m128i hi_10_22 = _mm_unpackhi_epi16(in[10], in[22]);               \
+    const __m128i lo_26_6 = _mm_unpacklo_epi16(in[26], in[6]);                 \
+    const __m128i hi_26_6 = _mm_unpackhi_epi16(in[26], in[6]);                 \
+                                                                               \
+    MULTIPLICATION_AND_ADD(lo_2_30, hi_2_30, lo_18_14, hi_18_14, stg2_0,       \
+                           stg2_1, stg2_2, stg2_3, stp2_8, stp2_15, stp2_9,    \
+                           stp2_14)                                            \
+    MULTIPLICATION_AND_ADD(lo_10_22, hi_10_22, lo_26_6, hi_26_6, stg2_4,       \
+                           stg2_5, stg2_6, stg2_7, stp2_10, stp2_13, stp2_11,  \
+                           stp2_12)                                            \
+                                                                               \
+    stp2_16 = _mm_add_epi16(stp1_16, stp1_17);                                 \
+    stp2_17 = _mm_sub_epi16(stp1_16, stp1_17);                                 \
+    stp2_18 = _mm_sub_epi16(stp1_19, stp1_18);                                 \
+    stp2_19 = _mm_add_epi16(stp1_19, stp1_18);                                 \
+                                                                               \
+    stp2_20 = _mm_add_epi16(stp1_20, stp1_21);                                 \
+    stp2_21 = _mm_sub_epi16(stp1_20, stp1_21);                                 \
+    stp2_22 = _mm_sub_epi16(stp1_23, stp1_22);                                 \
+    stp2_23 = _mm_add_epi16(stp1_23, stp1_22);                                 \
+                                                                               \
+    stp2_24 = _mm_add_epi16(stp1_24, stp1_25);                                 \
+    stp2_25 = _mm_sub_epi16(stp1_24, stp1_25);                                 \
+    stp2_26 = _mm_sub_epi16(stp1_27, stp1_26);                                 \
+    stp2_27 = _mm_add_epi16(stp1_27, stp1_26);                                 \
+                                                                               \
+    stp2_28 = _mm_add_epi16(stp1_28, stp1_29);                                 \
+    stp2_29 = _mm_sub_epi16(stp1_28, stp1_29);                                 \
+    stp2_30 = _mm_sub_epi16(stp1_31, stp1_30);                                 \
+    stp2_31 = _mm_add_epi16(stp1_31, stp1_30);                                 \
+  }                                                                            \
+                                                                               \
+  /* Stage3 */                                                                 \
+  {                                                                            \
+    const __m128i lo_4_28 = _mm_unpacklo_epi16(in[4], in[28]);                 \
+    const __m128i hi_4_28 = _mm_unpackhi_epi16(in[4], in[28]);                 \
+    const __m128i lo_20_12 = _mm_unpacklo_epi16(in[20], in[12]);               \
+    const __m128i hi_20_12 = _mm_unpackhi_epi16(in[20], in[12]);               \
+                                                                               \
+    const __m128i lo_17_30 = _mm_unpacklo_epi16(stp2_17, stp2_30);             \
+    const __m128i hi_17_30 = _mm_unpackhi_epi16(stp2_17, stp2_30);             \
+    const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29);             \
+    const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29);             \
+                                                                               \
+    const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26);             \
+    const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26);             \
+    const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25);             \
+    const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25);             \
+                                                                               \
+    MULTIPLICATION_AND_ADD(lo_4_28, hi_4_28, lo_20_12, hi_20_12, stg3_0,       \
+                           stg3_1, stg3_2, stg3_3, stp1_4, stp1_7, stp1_5,     \
+                           stp1_6)                                             \
+                                                                               \
+    stp1_8 = _mm_add_epi16(stp2_8, stp2_9);                                    \
+    stp1_9 = _mm_sub_epi16(stp2_8, stp2_9);                                    \
+    stp1_10 = _mm_sub_epi16(stp2_11, stp2_10);                                 \
+    stp1_11 = _mm_add_epi16(stp2_11, stp2_10);                                 \
+    stp1_12 = _mm_add_epi16(stp2_12, stp2_13);                                 \
+    stp1_13 = _mm_sub_epi16(stp2_12, stp2_13);                                 \
+    stp1_14 = _mm_sub_epi16(stp2_15, stp2_14);                                 \
+    stp1_15 = _mm_add_epi16(stp2_15, stp2_14);                                 \
+                                                                               \
+    MULTIPLICATION_AND_ADD(lo_17_30, hi_17_30, lo_18_29, hi_18_29, stg3_4,     \
+                           stg3_5, stg3_6, stg3_4, stp1_17, stp1_30, stp1_18,  \
+                           stp1_29)                                            \
+    MULTIPLICATION_AND_ADD(lo_21_26, hi_21_26, lo_22_25, hi_22_25, stg3_8,     \
+                           stg3_9, stg3_10, stg3_8, stp1_21, stp1_26, stp1_22, \
+                           stp1_25)                                            \
+                                                                               \
+    stp1_16 = stp2_16;                                                         \
+    stp1_31 = stp2_31;                                                         \
+    stp1_19 = stp2_19;                                                         \
+    stp1_20 = stp2_20;                                                         \
+    stp1_23 = stp2_23;                                                         \
+    stp1_24 = stp2_24;                                                         \
+    stp1_27 = stp2_27;                                                         \
+    stp1_28 = stp2_28;                                                         \
+  }                                                                            \
+                                                                               \
+  /* Stage4 */                                                                 \
+  {                                                                            \
+    const __m128i lo_0_16 = _mm_unpacklo_epi16(in[0], in[16]);                 \
+    const __m128i hi_0_16 = _mm_unpackhi_epi16(in[0], in[16]);                 \
+    const __m128i lo_8_24 = _mm_unpacklo_epi16(in[8], in[24]);                 \
+    const __m128i hi_8_24 = _mm_unpackhi_epi16(in[8], in[24]);                 \
+                                                                               \
+    const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14);               \
+    const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14);               \
+    const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13);             \
+    const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13);             \
+                                                                               \
+    MULTIPLICATION_AND_ADD(lo_0_16, hi_0_16, lo_8_24, hi_8_24, stg4_0, stg4_1, \
+                           stg4_2, stg4_3, stp2_0, stp2_1, stp2_2, stp2_3)     \
+                                                                               \
+    stp2_4 = _mm_add_epi16(stp1_4, stp1_5);                                    \
+    stp2_5 = _mm_sub_epi16(stp1_4, stp1_5);                                    \
+    stp2_6 = _mm_sub_epi16(stp1_7, stp1_6);                                    \
+    stp2_7 = _mm_add_epi16(stp1_7, stp1_6);                                    \
+                                                                               \
+    MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, stg4_4,       \
+                           stg4_5, stg4_6, stg4_4, stp2_9, stp2_14, stp2_10,   \
+                           stp2_13)                                            \
+                                                                               \
+    stp2_8 = stp1_8;                                                           \
+    stp2_15 = stp1_15;                                                         \
+    stp2_11 = stp1_11;                                                         \
+    stp2_12 = stp1_12;                                                         \
+                                                                               \
+    stp2_16 = _mm_add_epi16(stp1_16, stp1_19);                                 \
+    stp2_17 = _mm_add_epi16(stp1_17, stp1_18);                                 \
+    stp2_18 = _mm_sub_epi16(stp1_17, stp1_18);                                 \
+    stp2_19 = _mm_sub_epi16(stp1_16, stp1_19);                                 \
+    stp2_20 = _mm_sub_epi16(stp1_23, stp1_20);                                 \
+    stp2_21 = _mm_sub_epi16(stp1_22, stp1_21);                                 \
+    stp2_22 = _mm_add_epi16(stp1_22, stp1_21);                                 \
+    stp2_23 = _mm_add_epi16(stp1_23, stp1_20);                                 \
+                                                                               \
+    stp2_24 = _mm_add_epi16(stp1_24, stp1_27);                                 \
+    stp2_25 = _mm_add_epi16(stp1_25, stp1_26);                                 \
+    stp2_26 = _mm_sub_epi16(stp1_25, stp1_26);                                 \
+    stp2_27 = _mm_sub_epi16(stp1_24, stp1_27);                                 \
+    stp2_28 = _mm_sub_epi16(stp1_31, stp1_28);                                 \
+    stp2_29 = _mm_sub_epi16(stp1_30, stp1_29);                                 \
+    stp2_30 = _mm_add_epi16(stp1_29, stp1_30);                                 \
+    stp2_31 = _mm_add_epi16(stp1_28, stp1_31);                                 \
+  }                                                                            \
+                                                                               \
+  /* Stage5 */                                                                 \
+  {                                                                            \
+    const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5);                 \
+    const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5);                 \
+    const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29);             \
+    const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29);             \
+                                                                               \
+    const __m128i lo_19_28 = _mm_unpacklo_epi16(stp2_19, stp2_28);             \
+    const __m128i hi_19_28 = _mm_unpackhi_epi16(stp2_19, stp2_28);             \
+    const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27);             \
+    const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27);             \
+                                                                               \
+    const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26);             \
+    const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26);             \
+                                                                               \
+    stp1_0 = _mm_add_epi16(stp2_0, stp2_3);                                    \
+    stp1_1 = _mm_add_epi16(stp2_1, stp2_2);                                    \
+    stp1_2 = _mm_sub_epi16(stp2_1, stp2_2);                                    \
+    stp1_3 = _mm_sub_epi16(stp2_0, stp2_3);                                    \
+                                                                               \
+    tmp0 = _mm_madd_epi16(lo_6_5, stg4_1);                                     \
+    tmp1 = _mm_madd_epi16(hi_6_5, stg4_1);                                     \
+    tmp2 = _mm_madd_epi16(lo_6_5, stg4_0);                                     \
+    tmp3 = _mm_madd_epi16(hi_6_5, stg4_0);                                     \
+                                                                               \
+    tmp0 = _mm_add_epi32(tmp0, rounding);                                      \
+    tmp1 = _mm_add_epi32(tmp1, rounding);                                      \
+    tmp2 = _mm_add_epi32(tmp2, rounding);                                      \
+    tmp3 = _mm_add_epi32(tmp3, rounding);                                      \
+                                                                               \
+    tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);                               \
+    tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS);                               \
+    tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);                               \
+    tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS);                               \
+                                                                               \
+    stp1_5 = _mm_packs_epi32(tmp0, tmp1);                                      \
+    stp1_6 = _mm_packs_epi32(tmp2, tmp3);                                      \
+                                                                               \
+    stp1_4 = stp2_4;                                                           \
+    stp1_7 = stp2_7;                                                           \
+                                                                               \
+    stp1_8 = _mm_add_epi16(stp2_8, stp2_11);                                   \
+    stp1_9 = _mm_add_epi16(stp2_9, stp2_10);                                   \
+    stp1_10 = _mm_sub_epi16(stp2_9, stp2_10);                                  \
+    stp1_11 = _mm_sub_epi16(stp2_8, stp2_11);                                  \
+    stp1_12 = _mm_sub_epi16(stp2_15, stp2_12);                                 \
+    stp1_13 = _mm_sub_epi16(stp2_14, stp2_13);                                 \
+    stp1_14 = _mm_add_epi16(stp2_14, stp2_13);                                 \
+    stp1_15 = _mm_add_epi16(stp2_15, stp2_12);                                 \
+                                                                               \
+    stp1_16 = stp2_16;                                                         \
+    stp1_17 = stp2_17;                                                         \
+                                                                               \
+    MULTIPLICATION_AND_ADD(lo_18_29, hi_18_29, lo_19_28, hi_19_28, stg4_4,     \
+                           stg4_5, stg4_4, stg4_5, stp1_18, stp1_29, stp1_19,  \
+                           stp1_28)                                            \
+    MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg4_6,     \
+                           stg4_4, stg4_6, stg4_4, stp1_20, stp1_27, stp1_21,  \
+                           stp1_26)                                            \
+                                                                               \
+    stp1_22 = stp2_22;                                                         \
+    stp1_23 = stp2_23;                                                         \
+    stp1_24 = stp2_24;                                                         \
+    stp1_25 = stp2_25;                                                         \
+    stp1_30 = stp2_30;                                                         \
+    stp1_31 = stp2_31;                                                         \
+  }                                                                            \
+                                                                               \
+  /* Stage6 */                                                                 \
+  {                                                                            \
+    const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13);             \
+    const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13);             \
+    const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12);             \
+    const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12);             \
+                                                                               \
+    stp2_0 = _mm_add_epi16(stp1_0, stp1_7);                                    \
+    stp2_1 = _mm_add_epi16(stp1_1, stp1_6);                                    \
+    stp2_2 = _mm_add_epi16(stp1_2, stp1_5);                                    \
+    stp2_3 = _mm_add_epi16(stp1_3, stp1_4);                                    \
+    stp2_4 = _mm_sub_epi16(stp1_3, stp1_4);                                    \
+    stp2_5 = _mm_sub_epi16(stp1_2, stp1_5);                                    \
+    stp2_6 = _mm_sub_epi16(stp1_1, stp1_6);                                    \
+    stp2_7 = _mm_sub_epi16(stp1_0, stp1_7);                                    \
+                                                                               \
+    stp2_8 = stp1_8;                                                           \
+    stp2_9 = stp1_9;                                                           \
+    stp2_14 = stp1_14;                                                         \
+    stp2_15 = stp1_15;                                                         \
+                                                                               \
+    MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, stg6_0,     \
+                           stg4_0, stg6_0, stg4_0, stp2_10, stp2_13, stp2_11,  \
+                           stp2_12)                                            \
+                                                                               \
+    stp2_16 = _mm_add_epi16(stp1_16, stp1_23);                                 \
+    stp2_17 = _mm_add_epi16(stp1_17, stp1_22);                                 \
+    stp2_18 = _mm_add_epi16(stp1_18, stp1_21);                                 \
+    stp2_19 = _mm_add_epi16(stp1_19, stp1_20);                                 \
+    stp2_20 = _mm_sub_epi16(stp1_19, stp1_20);                                 \
+    stp2_21 = _mm_sub_epi16(stp1_18, stp1_21);                                 \
+    stp2_22 = _mm_sub_epi16(stp1_17, stp1_22);                                 \
+    stp2_23 = _mm_sub_epi16(stp1_16, stp1_23);                                 \
+                                                                               \
+    stp2_24 = _mm_sub_epi16(stp1_31, stp1_24);                                 \
+    stp2_25 = _mm_sub_epi16(stp1_30, stp1_25);                                 \
+    stp2_26 = _mm_sub_epi16(stp1_29, stp1_26);                                 \
+    stp2_27 = _mm_sub_epi16(stp1_28, stp1_27);                                 \
+    stp2_28 = _mm_add_epi16(stp1_27, stp1_28);                                 \
+    stp2_29 = _mm_add_epi16(stp1_26, stp1_29);                                 \
+    stp2_30 = _mm_add_epi16(stp1_25, stp1_30);                                 \
+    stp2_31 = _mm_add_epi16(stp1_24, stp1_31);                                 \
+  }                                                                            \
+                                                                               \
+  /* Stage7 */                                                                 \
+  {                                                                            \
+    const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27);             \
+    const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27);             \
+    const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26);             \
+    const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26);             \
+                                                                               \
+    const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25);             \
+    const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25);             \
+    const __m128i lo_23_24 = _mm_unpacklo_epi16(stp2_23, stp2_24);             \
+    const __m128i hi_23_24 = _mm_unpackhi_epi16(stp2_23, stp2_24);             \
+                                                                               \
+    stp1_0 = _mm_add_epi16(stp2_0, stp2_15);                                   \
+    stp1_1 = _mm_add_epi16(stp2_1, stp2_14);                                   \
+    stp1_2 = _mm_add_epi16(stp2_2, stp2_13);                                   \
+    stp1_3 = _mm_add_epi16(stp2_3, stp2_12);                                   \
+    stp1_4 = _mm_add_epi16(stp2_4, stp2_11);                                   \
+    stp1_5 = _mm_add_epi16(stp2_5, stp2_10);                                   \
+    stp1_6 = _mm_add_epi16(stp2_6, stp2_9);                                    \
+    stp1_7 = _mm_add_epi16(stp2_7, stp2_8);                                    \
+    stp1_8 = _mm_sub_epi16(stp2_7, stp2_8);                                    \
+    stp1_9 = _mm_sub_epi16(stp2_6, stp2_9);                                    \
+    stp1_10 = _mm_sub_epi16(stp2_5, stp2_10);                                  \
+    stp1_11 = _mm_sub_epi16(stp2_4, stp2_11);                                  \
+    stp1_12 = _mm_sub_epi16(stp2_3, stp2_12);                                  \
+    stp1_13 = _mm_sub_epi16(stp2_2, stp2_13);                                  \
+    stp1_14 = _mm_sub_epi16(stp2_1, stp2_14);                                  \
+    stp1_15 = _mm_sub_epi16(stp2_0, stp2_15);                                  \
+                                                                               \
+    stp1_16 = stp2_16;                                                         \
+    stp1_17 = stp2_17;                                                         \
+    stp1_18 = stp2_18;                                                         \
+    stp1_19 = stp2_19;                                                         \
+                                                                               \
+    MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg6_0,     \
+                           stg4_0, stg6_0, stg4_0, stp1_20, stp1_27, stp1_21,  \
+                           stp1_26)                                            \
+    MULTIPLICATION_AND_ADD(lo_22_25, hi_22_25, lo_23_24, hi_23_24, stg6_0,     \
+                           stg4_0, stg6_0, stg4_0, stp1_22, stp1_25, stp1_23,  \
+                           stp1_24)                                            \
+                                                                               \
+    stp1_28 = stp2_28;                                                         \
+    stp1_29 = stp2_29;                                                         \
+    stp1_30 = stp2_30;                                                         \
+    stp1_31 = stp2_31;                                                         \
+  }
+
+// Only upper-left 8x8 has non-zero coeff
+void vp10_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,
+                                int stride) {
+  const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+  const __m128i final_rounding = _mm_set1_epi16(1 << 5);
+
+  // vp10_idct constants for each stage
+  const __m128i stg1_0 = pair_set_epi16(cospi_31_64, -cospi_1_64);
+  const __m128i stg1_1 = pair_set_epi16(cospi_1_64, cospi_31_64);
+  const __m128i stg1_6 = pair_set_epi16(cospi_7_64, -cospi_25_64);
+  const __m128i stg1_7 = pair_set_epi16(cospi_25_64, cospi_7_64);
+  const __m128i stg1_8 = pair_set_epi16(cospi_27_64, -cospi_5_64);
+  const __m128i stg1_9 = pair_set_epi16(cospi_5_64, cospi_27_64);
+  const __m128i stg1_14 = pair_set_epi16(cospi_3_64, -cospi_29_64);
+  const __m128i stg1_15 = pair_set_epi16(cospi_29_64, cospi_3_64);
+
+  const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64);
+  const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64);
+  const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64);
+  const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64);
+
+  const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+  const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
+  const __m128i stg3_4 = pair_set_epi16(-cospi_4_64, cospi_28_64);
+  const __m128i stg3_5 = pair_set_epi16(cospi_28_64, cospi_4_64);
+  const __m128i stg3_6 = pair_set_epi16(-cospi_28_64, -cospi_4_64);
+  const __m128i stg3_8 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+  const __m128i stg3_9 = pair_set_epi16(cospi_12_64, cospi_20_64);
+  const __m128i stg3_10 = pair_set_epi16(-cospi_12_64, -cospi_20_64);
+
+  const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
+  const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+  const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+  const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64);
+  const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
+
+  const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64);
+
+  __m128i in[32], col[32];
+  __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7,
+      stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15,
+      stp1_16, stp1_17, stp1_18, stp1_19, stp1_20, stp1_21, stp1_22, stp1_23,
+      stp1_24, stp1_25, stp1_26, stp1_27, stp1_28, stp1_29, stp1_30, stp1_31;
+  __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7,
+      stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15,
+      stp2_16, stp2_17, stp2_18, stp2_19, stp2_20, stp2_21, stp2_22, stp2_23,
+      stp2_24, stp2_25, stp2_26, stp2_27, stp2_28, stp2_29, stp2_30, stp2_31;
+  __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  int i;
+
+  // Load input data. Only need to load the top left 8x8 block.
+  in[0] = _mm_load_si128((const __m128i *)input);
+  in[1] = _mm_load_si128((const __m128i *)(input + 32));
+  in[2] = _mm_load_si128((const __m128i *)(input + 64));
+  in[3] = _mm_load_si128((const __m128i *)(input + 96));
+  in[4] = _mm_load_si128((const __m128i *)(input + 128));
+  in[5] = _mm_load_si128((const __m128i *)(input + 160));
+  in[6] = _mm_load_si128((const __m128i *)(input + 192));
+  in[7] = _mm_load_si128((const __m128i *)(input + 224));
+
+  for (i = 8; i < 32; ++i) {
+    in[i] = _mm_setzero_si128();
+  }
+
+  array_transpose_8x8(in, in);
+  // TODO(hkuang): Following transposes are unnecessary. But remove them will
+  // lead to performance drop on some devices.
+  array_transpose_8x8(in + 8, in + 8);
+  array_transpose_8x8(in + 16, in + 16);
+  array_transpose_8x8(in + 24, in + 24);
+
+  IDCT32_34
+
+  // 1_D: Store 32 intermediate results for each 8x32 block.
+  col[0] = _mm_add_epi16(stp1_0, stp1_31);
+  col[1] = _mm_add_epi16(stp1_1, stp1_30);
+  col[2] = _mm_add_epi16(stp1_2, stp1_29);
+  col[3] = _mm_add_epi16(stp1_3, stp1_28);
+  col[4] = _mm_add_epi16(stp1_4, stp1_27);
+  col[5] = _mm_add_epi16(stp1_5, stp1_26);
+  col[6] = _mm_add_epi16(stp1_6, stp1_25);
+  col[7] = _mm_add_epi16(stp1_7, stp1_24);
+  col[8] = _mm_add_epi16(stp1_8, stp1_23);
+  col[9] = _mm_add_epi16(stp1_9, stp1_22);
+  col[10] = _mm_add_epi16(stp1_10, stp1_21);
+  col[11] = _mm_add_epi16(stp1_11, stp1_20);
+  col[12] = _mm_add_epi16(stp1_12, stp1_19);
+  col[13] = _mm_add_epi16(stp1_13, stp1_18);
+  col[14] = _mm_add_epi16(stp1_14, stp1_17);
+  col[15] = _mm_add_epi16(stp1_15, stp1_16);
+  col[16] = _mm_sub_epi16(stp1_15, stp1_16);
+  col[17] = _mm_sub_epi16(stp1_14, stp1_17);
+  col[18] = _mm_sub_epi16(stp1_13, stp1_18);
+  col[19] = _mm_sub_epi16(stp1_12, stp1_19);
+  col[20] = _mm_sub_epi16(stp1_11, stp1_20);
+  col[21] = _mm_sub_epi16(stp1_10, stp1_21);
+  col[22] = _mm_sub_epi16(stp1_9, stp1_22);
+  col[23] = _mm_sub_epi16(stp1_8, stp1_23);
+  col[24] = _mm_sub_epi16(stp1_7, stp1_24);
+  col[25] = _mm_sub_epi16(stp1_6, stp1_25);
+  col[26] = _mm_sub_epi16(stp1_5, stp1_26);
+  col[27] = _mm_sub_epi16(stp1_4, stp1_27);
+  col[28] = _mm_sub_epi16(stp1_3, stp1_28);
+  col[29] = _mm_sub_epi16(stp1_2, stp1_29);
+  col[30] = _mm_sub_epi16(stp1_1, stp1_30);
+  col[31] = _mm_sub_epi16(stp1_0, stp1_31);
+  for (i = 0; i < 4; i++) {
+    int j;
+    const __m128i zero = _mm_setzero_si128();
+    // Transpose 32x8 block to 8x32 block
+    array_transpose_8x8(col + i * 8, in);
+    IDCT32_34
+
+    // 2_D: Calculate the results and store them to destination.
+    in[0] = _mm_add_epi16(stp1_0, stp1_31);
+    in[1] = _mm_add_epi16(stp1_1, stp1_30);
+    in[2] = _mm_add_epi16(stp1_2, stp1_29);
+    in[3] = _mm_add_epi16(stp1_3, stp1_28);
+    in[4] = _mm_add_epi16(stp1_4, stp1_27);
+    in[5] = _mm_add_epi16(stp1_5, stp1_26);
+    in[6] = _mm_add_epi16(stp1_6, stp1_25);
+    in[7] = _mm_add_epi16(stp1_7, stp1_24);
+    in[8] = _mm_add_epi16(stp1_8, stp1_23);
+    in[9] = _mm_add_epi16(stp1_9, stp1_22);
+    in[10] = _mm_add_epi16(stp1_10, stp1_21);
+    in[11] = _mm_add_epi16(stp1_11, stp1_20);
+    in[12] = _mm_add_epi16(stp1_12, stp1_19);
+    in[13] = _mm_add_epi16(stp1_13, stp1_18);
+    in[14] = _mm_add_epi16(stp1_14, stp1_17);
+    in[15] = _mm_add_epi16(stp1_15, stp1_16);
+    in[16] = _mm_sub_epi16(stp1_15, stp1_16);
+    in[17] = _mm_sub_epi16(stp1_14, stp1_17);
+    in[18] = _mm_sub_epi16(stp1_13, stp1_18);
+    in[19] = _mm_sub_epi16(stp1_12, stp1_19);
+    in[20] = _mm_sub_epi16(stp1_11, stp1_20);
+    in[21] = _mm_sub_epi16(stp1_10, stp1_21);
+    in[22] = _mm_sub_epi16(stp1_9, stp1_22);
+    in[23] = _mm_sub_epi16(stp1_8, stp1_23);
+    in[24] = _mm_sub_epi16(stp1_7, stp1_24);
+    in[25] = _mm_sub_epi16(stp1_6, stp1_25);
+    in[26] = _mm_sub_epi16(stp1_5, stp1_26);
+    in[27] = _mm_sub_epi16(stp1_4, stp1_27);
+    in[28] = _mm_sub_epi16(stp1_3, stp1_28);
+    in[29] = _mm_sub_epi16(stp1_2, stp1_29);
+    in[30] = _mm_sub_epi16(stp1_1, stp1_30);
+    in[31] = _mm_sub_epi16(stp1_0, stp1_31);
+
+    for (j = 0; j < 32; ++j) {
+      // Final rounding and shift
+      in[j] = _mm_adds_epi16(in[j], final_rounding);
+      in[j] = _mm_srai_epi16(in[j], 6);
+      RECON_AND_STORE(dest + j * stride, in[j]);
+    }
+
+    dest += 8;
+  }
+}
+
+void vp10_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest,
+                                  int stride) {
+  const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+  const __m128i final_rounding = _mm_set1_epi16(1 << 5);
+  const __m128i zero = _mm_setzero_si128();
+
+  // vp10_idct constants for each stage
+  const __m128i stg1_0 = pair_set_epi16(cospi_31_64, -cospi_1_64);
+  const __m128i stg1_1 = pair_set_epi16(cospi_1_64, cospi_31_64);
+  const __m128i stg1_2 = pair_set_epi16(cospi_15_64, -cospi_17_64);
+  const __m128i stg1_3 = pair_set_epi16(cospi_17_64, cospi_15_64);
+  const __m128i stg1_4 = pair_set_epi16(cospi_23_64, -cospi_9_64);
+  const __m128i stg1_5 = pair_set_epi16(cospi_9_64, cospi_23_64);
+  const __m128i stg1_6 = pair_set_epi16(cospi_7_64, -cospi_25_64);
+  const __m128i stg1_7 = pair_set_epi16(cospi_25_64, cospi_7_64);
+  const __m128i stg1_8 = pair_set_epi16(cospi_27_64, -cospi_5_64);
+  const __m128i stg1_9 = pair_set_epi16(cospi_5_64, cospi_27_64);
+  const __m128i stg1_10 = pair_set_epi16(cospi_11_64, -cospi_21_64);
+  const __m128i stg1_11 = pair_set_epi16(cospi_21_64, cospi_11_64);
+  const __m128i stg1_12 = pair_set_epi16(cospi_19_64, -cospi_13_64);
+  const __m128i stg1_13 = pair_set_epi16(cospi_13_64, cospi_19_64);
+  const __m128i stg1_14 = pair_set_epi16(cospi_3_64, -cospi_29_64);
+  const __m128i stg1_15 = pair_set_epi16(cospi_29_64, cospi_3_64);
+
+  const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64);
+  const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64);
+  const __m128i stg2_2 = pair_set_epi16(cospi_14_64, -cospi_18_64);
+  const __m128i stg2_3 = pair_set_epi16(cospi_18_64, cospi_14_64);
+  const __m128i stg2_4 = pair_set_epi16(cospi_22_64, -cospi_10_64);
+  const __m128i stg2_5 = pair_set_epi16(cospi_10_64, cospi_22_64);
+  const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64);
+  const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64);
+
+  const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+  const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
+  const __m128i stg3_2 = pair_set_epi16(cospi_12_64, -cospi_20_64);
+  const __m128i stg3_3 = pair_set_epi16(cospi_20_64, cospi_12_64);
+  const __m128i stg3_4 = pair_set_epi16(-cospi_4_64, cospi_28_64);
+  const __m128i stg3_5 = pair_set_epi16(cospi_28_64, cospi_4_64);
+  const __m128i stg3_6 = pair_set_epi16(-cospi_28_64, -cospi_4_64);
+  const __m128i stg3_8 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+  const __m128i stg3_9 = pair_set_epi16(cospi_12_64, cospi_20_64);
+  const __m128i stg3_10 = pair_set_epi16(-cospi_12_64, -cospi_20_64);
+
+  const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
+  const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+  const __m128i stg4_2 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+  const __m128i stg4_3 = pair_set_epi16(cospi_8_64, cospi_24_64);
+  const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+  const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64);
+  const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
+
+  const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64);
+
+  __m128i in[32], col[128], zero_idx[16];
+  __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7,
+      stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15,
+      stp1_16, stp1_17, stp1_18, stp1_19, stp1_20, stp1_21, stp1_22, stp1_23,
+      stp1_24, stp1_25, stp1_26, stp1_27, stp1_28, stp1_29, stp1_30, stp1_31;
+  __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7,
+      stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15,
+      stp2_16, stp2_17, stp2_18, stp2_19, stp2_20, stp2_21, stp2_22, stp2_23,
+      stp2_24, stp2_25, stp2_26, stp2_27, stp2_28, stp2_29, stp2_30, stp2_31;
+  __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  int i, j, i32;
+
+  for (i = 0; i < 4; i++) {
+    i32 = (i << 5);
+    // First 1-D vp10_idct
+    // Load input data.
+    LOAD_DQCOEFF(in[0], input);
+    LOAD_DQCOEFF(in[8], input);
+    LOAD_DQCOEFF(in[16], input);
+    LOAD_DQCOEFF(in[24], input);
+    LOAD_DQCOEFF(in[1], input);
+    LOAD_DQCOEFF(in[9], input);
+    LOAD_DQCOEFF(in[17], input);
+    LOAD_DQCOEFF(in[25], input);
+    LOAD_DQCOEFF(in[2], input);
+    LOAD_DQCOEFF(in[10], input);
+    LOAD_DQCOEFF(in[18], input);
+    LOAD_DQCOEFF(in[26], input);
+    LOAD_DQCOEFF(in[3], input);
+    LOAD_DQCOEFF(in[11], input);
+    LOAD_DQCOEFF(in[19], input);
+    LOAD_DQCOEFF(in[27], input);
+
+    LOAD_DQCOEFF(in[4], input);
+    LOAD_DQCOEFF(in[12], input);
+    LOAD_DQCOEFF(in[20], input);
+    LOAD_DQCOEFF(in[28], input);
+    LOAD_DQCOEFF(in[5], input);
+    LOAD_DQCOEFF(in[13], input);
+    LOAD_DQCOEFF(in[21], input);
+    LOAD_DQCOEFF(in[29], input);
+    LOAD_DQCOEFF(in[6], input);
+    LOAD_DQCOEFF(in[14], input);
+    LOAD_DQCOEFF(in[22], input);
+    LOAD_DQCOEFF(in[30], input);
+    LOAD_DQCOEFF(in[7], input);
+    LOAD_DQCOEFF(in[15], input);
+    LOAD_DQCOEFF(in[23], input);
+    LOAD_DQCOEFF(in[31], input);
+
+    // checking if all entries are zero
+    zero_idx[0] = _mm_or_si128(in[0], in[1]);
+    zero_idx[1] = _mm_or_si128(in[2], in[3]);
+    zero_idx[2] = _mm_or_si128(in[4], in[5]);
+    zero_idx[3] = _mm_or_si128(in[6], in[7]);
+    zero_idx[4] = _mm_or_si128(in[8], in[9]);
+    zero_idx[5] = _mm_or_si128(in[10], in[11]);
+    zero_idx[6] = _mm_or_si128(in[12], in[13]);
+    zero_idx[7] = _mm_or_si128(in[14], in[15]);
+    zero_idx[8] = _mm_or_si128(in[16], in[17]);
+    zero_idx[9] = _mm_or_si128(in[18], in[19]);
+    zero_idx[10] = _mm_or_si128(in[20], in[21]);
+    zero_idx[11] = _mm_or_si128(in[22], in[23]);
+    zero_idx[12] = _mm_or_si128(in[24], in[25]);
+    zero_idx[13] = _mm_or_si128(in[26], in[27]);
+    zero_idx[14] = _mm_or_si128(in[28], in[29]);
+    zero_idx[15] = _mm_or_si128(in[30], in[31]);
+
+    zero_idx[0] = _mm_or_si128(zero_idx[0], zero_idx[1]);
+    zero_idx[1] = _mm_or_si128(zero_idx[2], zero_idx[3]);
+    zero_idx[2] = _mm_or_si128(zero_idx[4], zero_idx[5]);
+    zero_idx[3] = _mm_or_si128(zero_idx[6], zero_idx[7]);
+    zero_idx[4] = _mm_or_si128(zero_idx[8], zero_idx[9]);
+    zero_idx[5] = _mm_or_si128(zero_idx[10], zero_idx[11]);
+    zero_idx[6] = _mm_or_si128(zero_idx[12], zero_idx[13]);
+    zero_idx[7] = _mm_or_si128(zero_idx[14], zero_idx[15]);
+
+    zero_idx[8] = _mm_or_si128(zero_idx[0], zero_idx[1]);
+    zero_idx[9] = _mm_or_si128(zero_idx[2], zero_idx[3]);
+    zero_idx[10] = _mm_or_si128(zero_idx[4], zero_idx[5]);
+    zero_idx[11] = _mm_or_si128(zero_idx[6], zero_idx[7]);
+    zero_idx[12] = _mm_or_si128(zero_idx[8], zero_idx[9]);
+    zero_idx[13] = _mm_or_si128(zero_idx[10], zero_idx[11]);
+    zero_idx[14] = _mm_or_si128(zero_idx[12], zero_idx[13]);
+
+    if (_mm_movemask_epi8(_mm_cmpeq_epi32(zero_idx[14], zero)) == 0xFFFF) {
+      col[i32 + 0] = _mm_setzero_si128();
+      col[i32 + 1] = _mm_setzero_si128();
+      col[i32 + 2] = _mm_setzero_si128();
+      col[i32 + 3] = _mm_setzero_si128();
+      col[i32 + 4] = _mm_setzero_si128();
+      col[i32 + 5] = _mm_setzero_si128();
+      col[i32 + 6] = _mm_setzero_si128();
+      col[i32 + 7] = _mm_setzero_si128();
+      col[i32 + 8] = _mm_setzero_si128();
+      col[i32 + 9] = _mm_setzero_si128();
+      col[i32 + 10] = _mm_setzero_si128();
+      col[i32 + 11] = _mm_setzero_si128();
+      col[i32 + 12] = _mm_setzero_si128();
+      col[i32 + 13] = _mm_setzero_si128();
+      col[i32 + 14] = _mm_setzero_si128();
+      col[i32 + 15] = _mm_setzero_si128();
+      col[i32 + 16] = _mm_setzero_si128();
+      col[i32 + 17] = _mm_setzero_si128();
+      col[i32 + 18] = _mm_setzero_si128();
+      col[i32 + 19] = _mm_setzero_si128();
+      col[i32 + 20] = _mm_setzero_si128();
+      col[i32 + 21] = _mm_setzero_si128();
+      col[i32 + 22] = _mm_setzero_si128();
+      col[i32 + 23] = _mm_setzero_si128();
+      col[i32 + 24] = _mm_setzero_si128();
+      col[i32 + 25] = _mm_setzero_si128();
+      col[i32 + 26] = _mm_setzero_si128();
+      col[i32 + 27] = _mm_setzero_si128();
+      col[i32 + 28] = _mm_setzero_si128();
+      col[i32 + 29] = _mm_setzero_si128();
+      col[i32 + 30] = _mm_setzero_si128();
+      col[i32 + 31] = _mm_setzero_si128();
+      continue;
+    }
+
+    // Transpose 32x8 block to 8x32 block
+    array_transpose_8x8(in, in);
+    array_transpose_8x8(in + 8, in + 8);
+    array_transpose_8x8(in + 16, in + 16);
+    array_transpose_8x8(in + 24, in + 24);
+
+    IDCT32
+
+    // 1_D: Store 32 intermediate results for each 8x32 block.
+    col[i32 + 0] = _mm_add_epi16(stp1_0, stp1_31);
+    col[i32 + 1] = _mm_add_epi16(stp1_1, stp1_30);
+    col[i32 + 2] = _mm_add_epi16(stp1_2, stp1_29);
+    col[i32 + 3] = _mm_add_epi16(stp1_3, stp1_28);
+    col[i32 + 4] = _mm_add_epi16(stp1_4, stp1_27);
+    col[i32 + 5] = _mm_add_epi16(stp1_5, stp1_26);
+    col[i32 + 6] = _mm_add_epi16(stp1_6, stp1_25);
+    col[i32 + 7] = _mm_add_epi16(stp1_7, stp1_24);
+    col[i32 + 8] = _mm_add_epi16(stp1_8, stp1_23);
+    col[i32 + 9] = _mm_add_epi16(stp1_9, stp1_22);
+    col[i32 + 10] = _mm_add_epi16(stp1_10, stp1_21);
+    col[i32 + 11] = _mm_add_epi16(stp1_11, stp1_20);
+    col[i32 + 12] = _mm_add_epi16(stp1_12, stp1_19);
+    col[i32 + 13] = _mm_add_epi16(stp1_13, stp1_18);
+    col[i32 + 14] = _mm_add_epi16(stp1_14, stp1_17);
+    col[i32 + 15] = _mm_add_epi16(stp1_15, stp1_16);
+    col[i32 + 16] = _mm_sub_epi16(stp1_15, stp1_16);
+    col[i32 + 17] = _mm_sub_epi16(stp1_14, stp1_17);
+    col[i32 + 18] = _mm_sub_epi16(stp1_13, stp1_18);
+    col[i32 + 19] = _mm_sub_epi16(stp1_12, stp1_19);
+    col[i32 + 20] = _mm_sub_epi16(stp1_11, stp1_20);
+    col[i32 + 21] = _mm_sub_epi16(stp1_10, stp1_21);
+    col[i32 + 22] = _mm_sub_epi16(stp1_9, stp1_22);
+    col[i32 + 23] = _mm_sub_epi16(stp1_8, stp1_23);
+    col[i32 + 24] = _mm_sub_epi16(stp1_7, stp1_24);
+    col[i32 + 25] = _mm_sub_epi16(stp1_6, stp1_25);
+    col[i32 + 26] = _mm_sub_epi16(stp1_5, stp1_26);
+    col[i32 + 27] = _mm_sub_epi16(stp1_4, stp1_27);
+    col[i32 + 28] = _mm_sub_epi16(stp1_3, stp1_28);
+    col[i32 + 29] = _mm_sub_epi16(stp1_2, stp1_29);
+    col[i32 + 30] = _mm_sub_epi16(stp1_1, stp1_30);
+    col[i32 + 31] = _mm_sub_epi16(stp1_0, stp1_31);
+  }
+  for (i = 0; i < 4; i++) {
+    // Second 1-D vp10_idct
+    j = i << 3;
+
+    // Transpose 32x8 block to 8x32 block
+    array_transpose_8x8(col + j, in);
+    array_transpose_8x8(col + j + 32, in + 8);
+    array_transpose_8x8(col + j + 64, in + 16);
+    array_transpose_8x8(col + j + 96, in + 24);
+
+    IDCT32
+
+    // 2_D: Calculate the results and store them to destination.
+    in[0] = _mm_add_epi16(stp1_0, stp1_31);
+    in[1] = _mm_add_epi16(stp1_1, stp1_30);
+    in[2] = _mm_add_epi16(stp1_2, stp1_29);
+    in[3] = _mm_add_epi16(stp1_3, stp1_28);
+    in[4] = _mm_add_epi16(stp1_4, stp1_27);
+    in[5] = _mm_add_epi16(stp1_5, stp1_26);
+    in[6] = _mm_add_epi16(stp1_6, stp1_25);
+    in[7] = _mm_add_epi16(stp1_7, stp1_24);
+    in[8] = _mm_add_epi16(stp1_8, stp1_23);
+    in[9] = _mm_add_epi16(stp1_9, stp1_22);
+    in[10] = _mm_add_epi16(stp1_10, stp1_21);
+    in[11] = _mm_add_epi16(stp1_11, stp1_20);
+    in[12] = _mm_add_epi16(stp1_12, stp1_19);
+    in[13] = _mm_add_epi16(stp1_13, stp1_18);
+    in[14] = _mm_add_epi16(stp1_14, stp1_17);
+    in[15] = _mm_add_epi16(stp1_15, stp1_16);
+    in[16] = _mm_sub_epi16(stp1_15, stp1_16);
+    in[17] = _mm_sub_epi16(stp1_14, stp1_17);
+    in[18] = _mm_sub_epi16(stp1_13, stp1_18);
+    in[19] = _mm_sub_epi16(stp1_12, stp1_19);
+    in[20] = _mm_sub_epi16(stp1_11, stp1_20);
+    in[21] = _mm_sub_epi16(stp1_10, stp1_21);
+    in[22] = _mm_sub_epi16(stp1_9, stp1_22);
+    in[23] = _mm_sub_epi16(stp1_8, stp1_23);
+    in[24] = _mm_sub_epi16(stp1_7, stp1_24);
+    in[25] = _mm_sub_epi16(stp1_6, stp1_25);
+    in[26] = _mm_sub_epi16(stp1_5, stp1_26);
+    in[27] = _mm_sub_epi16(stp1_4, stp1_27);
+    in[28] = _mm_sub_epi16(stp1_3, stp1_28);
+    in[29] = _mm_sub_epi16(stp1_2, stp1_29);
+    in[30] = _mm_sub_epi16(stp1_1, stp1_30);
+    in[31] = _mm_sub_epi16(stp1_0, stp1_31);
+
+    for (j = 0; j < 32; ++j) {
+      // Final rounding and shift
+      in[j] = _mm_adds_epi16(in[j], final_rounding);
+      in[j] = _mm_srai_epi16(in[j], 6);
+      RECON_AND_STORE(dest + j * stride, in[j]);
+    }
+
+    dest += 8;
+  }
+}
+
+void vp10_idct32x32_1_add_sse2(const int16_t *input, uint8_t *dest,
+                               int stride) {
+  __m128i dc_value;
+  const __m128i zero = _mm_setzero_si128();
+  int a, i;
+
+  a = (int)dct_const_round_shift(input[0] * cospi_16_64);
+  a = (int)dct_const_round_shift(a * cospi_16_64);
+  a = ROUND_POWER_OF_TWO(a, 6);
+
+  dc_value = _mm_set1_epi16(a);
+
+  for (i = 0; i < 4; ++i) {
+    int j;
+    for (j = 0; j < 32; ++j) {
+      RECON_AND_STORE(dest + j * stride, dc_value);
+    }
+    dest += 8;
+  }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE __m128i clamp_high_sse2(__m128i value, int bd) {
+  __m128i ubounded, retval;
+  const __m128i zero = _mm_set1_epi16(0);
+  const __m128i one = _mm_set1_epi16(1);
+  const __m128i max = _mm_subs_epi16(_mm_slli_epi16(one, bd), one);
+  ubounded = _mm_cmpgt_epi16(value, max);
+  retval = _mm_andnot_si128(ubounded, value);
+  ubounded = _mm_and_si128(ubounded, max);
+  retval = _mm_or_si128(retval, ubounded);
+  retval = _mm_and_si128(retval, _mm_cmpgt_epi16(retval, zero));
+  return retval;
+}
+
+void vp10_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8,
+                                     int stride, int bd) {
+  tran_low_t out[4 * 4];
+  tran_low_t *outptr = out;
+  int i, j;
+  __m128i inptr[4];
+  __m128i sign_bits[2];
+  __m128i temp_mm, min_input, max_input;
+  int test;
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+  int optimised_cols = 0;
+  const __m128i zero = _mm_set1_epi16(0);
+  const __m128i eight = _mm_set1_epi16(8);
+  const __m128i max = _mm_set1_epi16(12043);
+  const __m128i min = _mm_set1_epi16(-12043);
+  // Load input into __m128i
+  inptr[0] = _mm_loadu_si128((const __m128i *)input);
+  inptr[1] = _mm_loadu_si128((const __m128i *)(input + 4));
+  inptr[2] = _mm_loadu_si128((const __m128i *)(input + 8));
+  inptr[3] = _mm_loadu_si128((const __m128i *)(input + 12));
+
+  // Pack to 16 bits
+  inptr[0] = _mm_packs_epi32(inptr[0], inptr[1]);
+  inptr[1] = _mm_packs_epi32(inptr[2], inptr[3]);
+
+  max_input = _mm_max_epi16(inptr[0], inptr[1]);
+  min_input = _mm_min_epi16(inptr[0], inptr[1]);
+  max_input = _mm_cmpgt_epi16(max_input, max);
+  min_input = _mm_cmplt_epi16(min_input, min);
+  temp_mm = _mm_or_si128(max_input, min_input);
+  test = _mm_movemask_epi8(temp_mm);
+
+  if (!test) {
+    // Do the row transform
+    vp10_idct4_sse2(inptr);
+
+    // Check the min & max values
+    max_input = _mm_max_epi16(inptr[0], inptr[1]);
+    min_input = _mm_min_epi16(inptr[0], inptr[1]);
+    max_input = _mm_cmpgt_epi16(max_input, max);
+    min_input = _mm_cmplt_epi16(min_input, min);
+    temp_mm = _mm_or_si128(max_input, min_input);
+    test = _mm_movemask_epi8(temp_mm);
+
+    if (test) {
+      transpose_4x4(inptr);
+      sign_bits[0] = _mm_cmplt_epi16(inptr[0], zero);
+      sign_bits[1] = _mm_cmplt_epi16(inptr[1], zero);
+      inptr[3] = _mm_unpackhi_epi16(inptr[1], sign_bits[1]);
+      inptr[2] = _mm_unpacklo_epi16(inptr[1], sign_bits[1]);
+      inptr[1] = _mm_unpackhi_epi16(inptr[0], sign_bits[0]);
+      inptr[0] = _mm_unpacklo_epi16(inptr[0], sign_bits[0]);
+      _mm_storeu_si128((__m128i *)outptr, inptr[0]);
+      _mm_storeu_si128((__m128i *)(outptr + 4), inptr[1]);
+      _mm_storeu_si128((__m128i *)(outptr + 8), inptr[2]);
+      _mm_storeu_si128((__m128i *)(outptr + 12), inptr[3]);
+    } else {
+      // Set to use the optimised transform for the column
+      optimised_cols = 1;
+    }
+  } else {
+    // Run the un-optimised row transform
+    for (i = 0; i < 4; ++i) {
+      vp10_highbd_idct4_c(input, outptr, bd);
+      input += 4;
+      outptr += 4;
+    }
+  }
+
+  if (optimised_cols) {
+    vp10_idct4_sse2(inptr);
+
+    // Final round and shift
+    inptr[0] = _mm_add_epi16(inptr[0], eight);
+    inptr[1] = _mm_add_epi16(inptr[1], eight);
+
+    inptr[0] = _mm_srai_epi16(inptr[0], 4);
+    inptr[1] = _mm_srai_epi16(inptr[1], 4);
+
+    // Reconstruction and Store
+    {
+      __m128i d0 = _mm_loadl_epi64((const __m128i *)dest);
+      __m128i d2 = _mm_loadl_epi64((const __m128i *)(dest + stride * 2));
+      d0 = _mm_unpacklo_epi64(
+          d0, _mm_loadl_epi64((const __m128i *)(dest + stride)));
+      d2 = _mm_unpacklo_epi64(
+          d2, _mm_loadl_epi64((const __m128i *)(dest + stride * 3)));
+      d0 = clamp_high_sse2(_mm_adds_epi16(d0, inptr[0]), bd);
+      d2 = clamp_high_sse2(_mm_adds_epi16(d2, inptr[1]), bd);
+      // store input0
+      _mm_storel_epi64((__m128i *)dest, d0);
+      // store input1
+      d0 = _mm_srli_si128(d0, 8);
+      _mm_storel_epi64((__m128i *)(dest + stride), d0);
+      // store input2
+      _mm_storel_epi64((__m128i *)(dest + stride * 2), d2);
+      // store input3
+      d2 = _mm_srli_si128(d2, 8);
+      _mm_storel_epi64((__m128i *)(dest + stride * 3), d2);
+    }
+  } else {
+    // Run the un-optimised column transform
+    tran_low_t temp_in[4], temp_out[4];
+    // Columns
+    for (i = 0; i < 4; ++i) {
+      for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
+      vp10_highbd_idct4_c(temp_in, temp_out, bd);
+      for (j = 0; j < 4; ++j) {
+        dest[j * stride + i] = highbd_clip_pixel_add(
+            dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
+      }
+    }
+  }
+}
+
+void vp10_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8,
+                                     int stride, int bd) {
+  tran_low_t out[8 * 8];
+  tran_low_t *outptr = out;
+  int i, j, test;
+  __m128i inptr[8];
+  __m128i min_input, max_input, temp1, temp2, sign_bits;
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+  const __m128i zero = _mm_set1_epi16(0);
+  const __m128i sixteen = _mm_set1_epi16(16);
+  const __m128i max = _mm_set1_epi16(6201);
+  const __m128i min = _mm_set1_epi16(-6201);
+  int optimised_cols = 0;
+
+  // Load input into __m128i & pack to 16 bits
+  for (i = 0; i < 8; i++) {
+    temp1 = _mm_loadu_si128((const __m128i *)(input + 8 * i));
+    temp2 = _mm_loadu_si128((const __m128i *)(input + 8 * i + 4));
+    inptr[i] = _mm_packs_epi32(temp1, temp2);
+  }
+
+  // Find the min & max for the row transform
+  max_input = _mm_max_epi16(inptr[0], inptr[1]);
+  min_input = _mm_min_epi16(inptr[0], inptr[1]);
+  for (i = 2; i < 8; i++) {
+    max_input = _mm_max_epi16(max_input, inptr[i]);
+    min_input = _mm_min_epi16(min_input, inptr[i]);
+  }
+  max_input = _mm_cmpgt_epi16(max_input, max);
+  min_input = _mm_cmplt_epi16(min_input, min);
+  temp1 = _mm_or_si128(max_input, min_input);
+  test = _mm_movemask_epi8(temp1);
+
+  if (!test) {
+    // Do the row transform
+    vp10_idct8_sse2(inptr);
+
+    // Find the min & max for the column transform
+    max_input = _mm_max_epi16(inptr[0], inptr[1]);
+    min_input = _mm_min_epi16(inptr[0], inptr[1]);
+    for (i = 2; i < 8; i++) {
+      max_input = _mm_max_epi16(max_input, inptr[i]);
+      min_input = _mm_min_epi16(min_input, inptr[i]);
+    }
+    max_input = _mm_cmpgt_epi16(max_input, max);
+    min_input = _mm_cmplt_epi16(min_input, min);
+    temp1 = _mm_or_si128(max_input, min_input);
+    test = _mm_movemask_epi8(temp1);
+
+    if (test) {
+      array_transpose_8x8(inptr, inptr);
+      for (i = 0; i < 8; i++) {
+        sign_bits = _mm_cmplt_epi16(inptr[i], zero);
+        temp1 = _mm_unpackhi_epi16(inptr[i], sign_bits);
+        temp2 = _mm_unpacklo_epi16(inptr[i], sign_bits);
+        _mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i + 1)), temp1);
+        _mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i)), temp2);
+      }
+    } else {
+      // Set to use the optimised transform for the column
+      optimised_cols = 1;
+    }
+  } else {
+    // Run the un-optimised row transform
+    for (i = 0; i < 8; ++i) {
+      vp10_highbd_idct8_c(input, outptr, bd);
+      input += 8;
+      outptr += 8;
+    }
+  }
+
+  if (optimised_cols) {
+    vp10_idct8_sse2(inptr);
+
+    // Final round & shift and Reconstruction and Store
+    {
+      __m128i d[8];
+      for (i = 0; i < 8; i++) {
+        inptr[i] = _mm_add_epi16(inptr[i], sixteen);
+        d[i] = _mm_loadu_si128((const __m128i *)(dest + stride * i));
+        inptr[i] = _mm_srai_epi16(inptr[i], 5);
+        d[i] = clamp_high_sse2(_mm_adds_epi16(d[i], inptr[i]), bd);
+        // Store
+        _mm_storeu_si128((__m128i *)(dest + stride * i), d[i]);
+      }
+    }
+  } else {
+    // Run the un-optimised column transform
+    tran_low_t temp_in[8], temp_out[8];
+    for (i = 0; i < 8; ++i) {
+      for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
+      vp10_highbd_idct8_c(temp_in, temp_out, bd);
+      for (j = 0; j < 8; ++j) {
+        dest[j * stride + i] = highbd_clip_pixel_add(
+            dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
+      }
+    }
+  }
+}
+
+void vp10_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8,
+                                     int stride, int bd) {
+  tran_low_t out[8 * 8] = { 0 };
+  tran_low_t *outptr = out;
+  int i, j, test;
+  __m128i inptr[8];
+  __m128i min_input, max_input, temp1, temp2, sign_bits;
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+  const __m128i zero = _mm_set1_epi16(0);
+  const __m128i sixteen = _mm_set1_epi16(16);
+  const __m128i max = _mm_set1_epi16(6201);
+  const __m128i min = _mm_set1_epi16(-6201);
+  int optimised_cols = 0;
+
+  // Load input into __m128i & pack to 16 bits
+  for (i = 0; i < 8; i++) {
+    temp1 = _mm_loadu_si128((const __m128i *)(input + 8 * i));
+    temp2 = _mm_loadu_si128((const __m128i *)(input + 8 * i + 4));
+    inptr[i] = _mm_packs_epi32(temp1, temp2);
+  }
+
+  // Find the min & max for the row transform
+  // only first 4 row has non-zero coefs
+  max_input = _mm_max_epi16(inptr[0], inptr[1]);
+  min_input = _mm_min_epi16(inptr[0], inptr[1]);
+  for (i = 2; i < 4; i++) {
+    max_input = _mm_max_epi16(max_input, inptr[i]);
+    min_input = _mm_min_epi16(min_input, inptr[i]);
+  }
+  max_input = _mm_cmpgt_epi16(max_input, max);
+  min_input = _mm_cmplt_epi16(min_input, min);
+  temp1 = _mm_or_si128(max_input, min_input);
+  test = _mm_movemask_epi8(temp1);
+
+  if (!test) {
+    // Do the row transform
+    vp10_idct8_sse2(inptr);
+
+    // Find the min & max for the column transform
+    // N.B. Only first 4 cols contain non-zero coeffs
+    max_input = _mm_max_epi16(inptr[0], inptr[1]);
+    min_input = _mm_min_epi16(inptr[0], inptr[1]);
+    for (i = 2; i < 8; i++) {
+      max_input = _mm_max_epi16(max_input, inptr[i]);
+      min_input = _mm_min_epi16(min_input, inptr[i]);
+    }
+    max_input = _mm_cmpgt_epi16(max_input, max);
+    min_input = _mm_cmplt_epi16(min_input, min);
+    temp1 = _mm_or_si128(max_input, min_input);
+    test = _mm_movemask_epi8(temp1);
+
+    if (test) {
+      // Use fact only first 4 rows contain non-zero coeffs
+      array_transpose_4X8(inptr, inptr);
+      for (i = 0; i < 4; i++) {
+        sign_bits = _mm_cmplt_epi16(inptr[i], zero);
+        temp1 = _mm_unpackhi_epi16(inptr[i], sign_bits);
+        temp2 = _mm_unpacklo_epi16(inptr[i], sign_bits);
+        _mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i + 1)), temp1);
+        _mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i)), temp2);
+      }
+    } else {
+      // Set to use the optimised transform for the column
+      optimised_cols = 1;
+    }
+  } else {
+    // Run the un-optimised row transform
+    for (i = 0; i < 4; ++i) {
+      vp10_highbd_idct8_c(input, outptr, bd);
+      input += 8;
+      outptr += 8;
+    }
+  }
+
+  if (optimised_cols) {
+    vp10_idct8_sse2(inptr);
+
+    // Final round & shift and Reconstruction and Store
+    {
+      __m128i d[8];
+      for (i = 0; i < 8; i++) {
+        inptr[i] = _mm_add_epi16(inptr[i], sixteen);
+        d[i] = _mm_loadu_si128((const __m128i *)(dest + stride * i));
+        inptr[i] = _mm_srai_epi16(inptr[i], 5);
+        d[i] = clamp_high_sse2(_mm_adds_epi16(d[i], inptr[i]), bd);
+        // Store
+        _mm_storeu_si128((__m128i *)(dest + stride * i), d[i]);
+      }
+    }
+  } else {
+    // Run the un-optimised column transform
+    tran_low_t temp_in[8], temp_out[8];
+    for (i = 0; i < 8; ++i) {
+      for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
+      vp10_highbd_idct8_c(temp_in, temp_out, bd);
+      for (j = 0; j < 8; ++j) {
+        dest[j * stride + i] = highbd_clip_pixel_add(
+            dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
+      }
+    }
+  }
+}
+
+void vp10_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest8,
+                                        int stride, int bd) {
+  tran_low_t out[16 * 16];
+  tran_low_t *outptr = out;
+  int i, j, test;
+  __m128i inptr[32];
+  __m128i min_input, max_input, temp1, temp2, sign_bits;
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+  const __m128i zero = _mm_set1_epi16(0);
+  const __m128i rounding = _mm_set1_epi16(32);
+  const __m128i max = _mm_set1_epi16(3155);
+  const __m128i min = _mm_set1_epi16(-3155);
+  int optimised_cols = 0;
+
+  // Load input into __m128i & pack to 16 bits
+  for (i = 0; i < 16; i++) {
+    temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i));
+    temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 4));
+    inptr[i] = _mm_packs_epi32(temp1, temp2);
+    temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 8));
+    temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 12));
+    inptr[i + 16] = _mm_packs_epi32(temp1, temp2);
+  }
+
+  // Find the min & max for the row transform
+  max_input = _mm_max_epi16(inptr[0], inptr[1]);
+  min_input = _mm_min_epi16(inptr[0], inptr[1]);
+  for (i = 2; i < 32; i++) {
+    max_input = _mm_max_epi16(max_input, inptr[i]);
+    min_input = _mm_min_epi16(min_input, inptr[i]);
+  }
+  max_input = _mm_cmpgt_epi16(max_input, max);
+  min_input = _mm_cmplt_epi16(min_input, min);
+  temp1 = _mm_or_si128(max_input, min_input);
+  test = _mm_movemask_epi8(temp1);
+
+  if (!test) {
+    // Do the row transform
+    vp10_idct16_sse2(inptr, inptr + 16);
+
+    // Find the min & max for the column transform
+    max_input = _mm_max_epi16(inptr[0], inptr[1]);
+    min_input = _mm_min_epi16(inptr[0], inptr[1]);
+    for (i = 2; i < 32; i++) {
+      max_input = _mm_max_epi16(max_input, inptr[i]);
+      min_input = _mm_min_epi16(min_input, inptr[i]);
+    }
+    max_input = _mm_cmpgt_epi16(max_input, max);
+    min_input = _mm_cmplt_epi16(min_input, min);
+    temp1 = _mm_or_si128(max_input, min_input);
+    test = _mm_movemask_epi8(temp1);
+
+    if (test) {
+      array_transpose_16x16(inptr, inptr + 16);
+      for (i = 0; i < 16; i++) {
+        sign_bits = _mm_cmplt_epi16(inptr[i], zero);
+        temp1 = _mm_unpacklo_epi16(inptr[i], sign_bits);
+        temp2 = _mm_unpackhi_epi16(inptr[i], sign_bits);
+        _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4)), temp1);
+        _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 1)), temp2);
+        sign_bits = _mm_cmplt_epi16(inptr[i + 16], zero);
+        temp1 = _mm_unpacklo_epi16(inptr[i + 16], sign_bits);
+        temp2 = _mm_unpackhi_epi16(inptr[i + 16], sign_bits);
+        _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 2)), temp1);
+        _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 3)), temp2);
+      }
+    } else {
+      // Set to use the optimised transform for the column
+      optimised_cols = 1;
+    }
+  } else {
+    // Run the un-optimised row transform
+    for (i = 0; i < 16; ++i) {
+      vp10_highbd_idct16_c(input, outptr, bd);
+      input += 16;
+      outptr += 16;
+    }
+  }
+
+  if (optimised_cols) {
+    vp10_idct16_sse2(inptr, inptr + 16);
+
+    // Final round & shift and Reconstruction and Store
+    {
+      __m128i d[2];
+      for (i = 0; i < 16; i++) {
+        inptr[i] = _mm_add_epi16(inptr[i], rounding);
+        inptr[i + 16] = _mm_add_epi16(inptr[i + 16], rounding);
+        d[0] = _mm_loadu_si128((const __m128i *)(dest + stride * i));
+        d[1] = _mm_loadu_si128((const __m128i *)(dest + stride * i + 8));
+        inptr[i] = _mm_srai_epi16(inptr[i], 6);
+        inptr[i + 16] = _mm_srai_epi16(inptr[i + 16], 6);
+        d[0] = clamp_high_sse2(_mm_add_epi16(d[0], inptr[i]), bd);
+        d[1] = clamp_high_sse2(_mm_add_epi16(d[1], inptr[i + 16]), bd);
+        // Store
+        _mm_storeu_si128((__m128i *)(dest + stride * i), d[0]);
+        _mm_storeu_si128((__m128i *)(dest + stride * i + 8), d[1]);
+      }
+    }
+  } else {
+    // Run the un-optimised column transform
+    tran_low_t temp_in[16], temp_out[16];
+    for (i = 0; i < 16; ++i) {
+      for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
+      vp10_highbd_idct16_c(temp_in, temp_out, bd);
+      for (j = 0; j < 16; ++j) {
+        dest[j * stride + i] = highbd_clip_pixel_add(
+            dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
+      }
+    }
+  }
+}
+
+void vp10_highbd_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest8,
+                                       int stride, int bd) {
+  tran_low_t out[16 * 16] = { 0 };
+  tran_low_t *outptr = out;
+  int i, j, test;
+  __m128i inptr[32];
+  __m128i min_input, max_input, temp1, temp2, sign_bits;
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+  const __m128i zero = _mm_set1_epi16(0);
+  const __m128i rounding = _mm_set1_epi16(32);
+  const __m128i max = _mm_set1_epi16(3155);
+  const __m128i min = _mm_set1_epi16(-3155);
+  int optimised_cols = 0;
+
+  // Load input into __m128i & pack to 16 bits
+  for (i = 0; i < 16; i++) {
+    temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i));
+    temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 4));
+    inptr[i] = _mm_packs_epi32(temp1, temp2);
+    temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 8));
+    temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 12));
+    inptr[i + 16] = _mm_packs_epi32(temp1, temp2);
+  }
+
+  // Find the min & max for the row transform
+  // Since all non-zero dct coefficients are in upper-left 4x4 area,
+  // we only need to consider first 4 rows here.
+  max_input = _mm_max_epi16(inptr[0], inptr[1]);
+  min_input = _mm_min_epi16(inptr[0], inptr[1]);
+  for (i = 2; i < 4; i++) {
+    max_input = _mm_max_epi16(max_input, inptr[i]);
+    min_input = _mm_min_epi16(min_input, inptr[i]);
+  }
+  max_input = _mm_cmpgt_epi16(max_input, max);
+  min_input = _mm_cmplt_epi16(min_input, min);
+  temp1 = _mm_or_si128(max_input, min_input);
+  test = _mm_movemask_epi8(temp1);
+
+  if (!test) {
+    // Do the row transform (N.B. This transposes inptr)
+    vp10_idct16_sse2(inptr, inptr + 16);
+
+    // Find the min & max for the column transform
+    // N.B. Only first 4 cols contain non-zero coeffs
+    max_input = _mm_max_epi16(inptr[0], inptr[1]);
+    min_input = _mm_min_epi16(inptr[0], inptr[1]);
+    for (i = 2; i < 16; i++) {
+      max_input = _mm_max_epi16(max_input, inptr[i]);
+      min_input = _mm_min_epi16(min_input, inptr[i]);
+    }
+    max_input = _mm_cmpgt_epi16(max_input, max);
+    min_input = _mm_cmplt_epi16(min_input, min);
+    temp1 = _mm_or_si128(max_input, min_input);
+    test = _mm_movemask_epi8(temp1);
+
+    if (test) {
+      // Use fact only first 4 rows contain non-zero coeffs
+      array_transpose_8x8(inptr, inptr);
+      array_transpose_8x8(inptr + 8, inptr + 16);
+      for (i = 0; i < 4; i++) {
+        sign_bits = _mm_cmplt_epi16(inptr[i], zero);
+        temp1 = _mm_unpacklo_epi16(inptr[i], sign_bits);
+        temp2 = _mm_unpackhi_epi16(inptr[i], sign_bits);
+        _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4)), temp1);
+        _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 1)), temp2);
+        sign_bits = _mm_cmplt_epi16(inptr[i + 16], zero);
+        temp1 = _mm_unpacklo_epi16(inptr[i + 16], sign_bits);
+        temp2 = _mm_unpackhi_epi16(inptr[i + 16], sign_bits);
+        _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 2)), temp1);
+        _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 3)), temp2);
+      }
+    } else {
+      // Set to use the optimised transform for the column
+      optimised_cols = 1;
+    }
+  } else {
+    // Run the un-optimised row transform
+    for (i = 0; i < 4; ++i) {
+      vp10_highbd_idct16_c(input, outptr, bd);
+      input += 16;
+      outptr += 16;
+    }
+  }
+
+  if (optimised_cols) {
+    vp10_idct16_sse2(inptr, inptr + 16);
+
+    // Final round & shift and Reconstruction and Store
+    {
+      __m128i d[2];
+      for (i = 0; i < 16; i++) {
+        inptr[i] = _mm_add_epi16(inptr[i], rounding);
+        inptr[i + 16] = _mm_add_epi16(inptr[i + 16], rounding);
+        d[0] = _mm_loadu_si128((const __m128i *)(dest + stride * i));
+        d[1] = _mm_loadu_si128((const __m128i *)(dest + stride * i + 8));
+        inptr[i] = _mm_srai_epi16(inptr[i], 6);
+        inptr[i + 16] = _mm_srai_epi16(inptr[i + 16], 6);
+        d[0] = clamp_high_sse2(_mm_add_epi16(d[0], inptr[i]), bd);
+        d[1] = clamp_high_sse2(_mm_add_epi16(d[1], inptr[i + 16]), bd);
+        // Store
+        _mm_storeu_si128((__m128i *)(dest + stride * i), d[0]);
+        _mm_storeu_si128((__m128i *)(dest + stride * i + 8), d[1]);
+      }
+    }
+  } else {
+    // Run the un-optimised column transform
+    tran_low_t temp_in[16], temp_out[16];
+    for (i = 0; i < 16; ++i) {
+      for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
+      vp10_highbd_idct16_c(temp_in, temp_out, bd);
+      for (j = 0; j < 16; ++j) {
+        dest[j * stride + i] = highbd_clip_pixel_add(
+            dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
+      }
+    }
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
diff --git a/av1/common/x86/vp10_inv_txfm_sse2.h b/av1/common/x86/vp10_inv_txfm_sse2.h
new file mode 100644
index 0000000..0839ab9
--- /dev/null
+++ b/av1/common/x86/vp10_inv_txfm_sse2.h
@@ -0,0 +1,184 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_DSP_X86_INV_TXFM_SSE2_H_
+#define VPX_DSP_X86_INV_TXFM_SSE2_H_
+
+#include <emmintrin.h>  // SSE2
+#include "./vpx_config.h"
+#include "aom/vpx_integer.h"
+#include "av1/common/vp10_inv_txfm.h"
+
+// perform 8x8 transpose
+static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) {
+  const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]);
+  const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]);
+  const __m128i tr0_2 = _mm_unpackhi_epi16(in[0], in[1]);
+  const __m128i tr0_3 = _mm_unpackhi_epi16(in[2], in[3]);
+  const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]);
+  const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]);
+  const __m128i tr0_6 = _mm_unpackhi_epi16(in[4], in[5]);
+  const __m128i tr0_7 = _mm_unpackhi_epi16(in[6], in[7]);
+
+  const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+  const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_4, tr0_5);
+  const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
+  const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_4, tr0_5);
+  const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_2, tr0_3);
+  const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);
+  const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_2, tr0_3);
+  const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);
+
+  res[0] = _mm_unpacklo_epi64(tr1_0, tr1_1);
+  res[1] = _mm_unpackhi_epi64(tr1_0, tr1_1);
+  res[2] = _mm_unpacklo_epi64(tr1_2, tr1_3);
+  res[3] = _mm_unpackhi_epi64(tr1_2, tr1_3);
+  res[4] = _mm_unpacklo_epi64(tr1_4, tr1_5);
+  res[5] = _mm_unpackhi_epi64(tr1_4, tr1_5);
+  res[6] = _mm_unpacklo_epi64(tr1_6, tr1_7);
+  res[7] = _mm_unpackhi_epi64(tr1_6, tr1_7);
+}
+
+#define TRANSPOSE_8X4(in0, in1, in2, in3, out0, out1)   \
+  {                                                     \
+    const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \
+    const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \
+                                                        \
+    in0 = _mm_unpacklo_epi32(tr0_0, tr0_1); /* i1 i0 */ \
+    in1 = _mm_unpackhi_epi32(tr0_0, tr0_1); /* i3 i2 */ \
+  }
+
+static INLINE void array_transpose_4X8(__m128i *in, __m128i *out) {
+  const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]);
+  const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]);
+  const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]);
+  const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]);
+
+  const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+  const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
+  const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5);
+  const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5);
+
+  out[0] = _mm_unpacklo_epi64(tr1_0, tr1_4);
+  out[1] = _mm_unpackhi_epi64(tr1_0, tr1_4);
+  out[2] = _mm_unpacklo_epi64(tr1_2, tr1_6);
+  out[3] = _mm_unpackhi_epi64(tr1_2, tr1_6);
+}
+
+static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) {
+  __m128i tbuf[8];
+  array_transpose_8x8(res0, res0);
+  array_transpose_8x8(res1, tbuf);
+  array_transpose_8x8(res0 + 8, res1);
+  array_transpose_8x8(res1 + 8, res1 + 8);
+
+  res0[8] = tbuf[0];
+  res0[9] = tbuf[1];
+  res0[10] = tbuf[2];
+  res0[11] = tbuf[3];
+  res0[12] = tbuf[4];
+  res0[13] = tbuf[5];
+  res0[14] = tbuf[6];
+  res0[15] = tbuf[7];
+}
+
+static INLINE void load_buffer_8x16(const int16_t *input, __m128i *in) {
+  in[0] = _mm_load_si128((const __m128i *)(input + 0 * 16));
+  in[1] = _mm_load_si128((const __m128i *)(input + 1 * 16));
+  in[2] = _mm_load_si128((const __m128i *)(input + 2 * 16));
+  in[3] = _mm_load_si128((const __m128i *)(input + 3 * 16));
+  in[4] = _mm_load_si128((const __m128i *)(input + 4 * 16));
+  in[5] = _mm_load_si128((const __m128i *)(input + 5 * 16));
+  in[6] = _mm_load_si128((const __m128i *)(input + 6 * 16));
+  in[7] = _mm_load_si128((const __m128i *)(input + 7 * 16));
+
+  in[8] = _mm_load_si128((const __m128i *)(input + 8 * 16));
+  in[9] = _mm_load_si128((const __m128i *)(input + 9 * 16));
+  in[10] = _mm_load_si128((const __m128i *)(input + 10 * 16));
+  in[11] = _mm_load_si128((const __m128i *)(input + 11 * 16));
+  in[12] = _mm_load_si128((const __m128i *)(input + 12 * 16));
+  in[13] = _mm_load_si128((const __m128i *)(input + 13 * 16));
+  in[14] = _mm_load_si128((const __m128i *)(input + 14 * 16));
+  in[15] = _mm_load_si128((const __m128i *)(input + 15 * 16));
+}
+
+#define RECON_AND_STORE(dest, in_x)                  \
+  {                                                  \
+    __m128i d0 = _mm_loadl_epi64((__m128i *)(dest)); \
+    d0 = _mm_unpacklo_epi8(d0, zero);                \
+    d0 = _mm_add_epi16(in_x, d0);                    \
+    d0 = _mm_packus_epi16(d0, d0);                   \
+    _mm_storel_epi64((__m128i *)(dest), d0);         \
+  }
+
+static INLINE void write_buffer_8x16(uint8_t *dest, __m128i *in, int stride) {
+  const __m128i final_rounding = _mm_set1_epi16(1 << 5);
+  const __m128i zero = _mm_setzero_si128();
+  // Final rounding and shift
+  in[0] = _mm_adds_epi16(in[0], final_rounding);
+  in[1] = _mm_adds_epi16(in[1], final_rounding);
+  in[2] = _mm_adds_epi16(in[2], final_rounding);
+  in[3] = _mm_adds_epi16(in[3], final_rounding);
+  in[4] = _mm_adds_epi16(in[4], final_rounding);
+  in[5] = _mm_adds_epi16(in[5], final_rounding);
+  in[6] = _mm_adds_epi16(in[6], final_rounding);
+  in[7] = _mm_adds_epi16(in[7], final_rounding);
+  in[8] = _mm_adds_epi16(in[8], final_rounding);
+  in[9] = _mm_adds_epi16(in[9], final_rounding);
+  in[10] = _mm_adds_epi16(in[10], final_rounding);
+  in[11] = _mm_adds_epi16(in[11], final_rounding);
+  in[12] = _mm_adds_epi16(in[12], final_rounding);
+  in[13] = _mm_adds_epi16(in[13], final_rounding);
+  in[14] = _mm_adds_epi16(in[14], final_rounding);
+  in[15] = _mm_adds_epi16(in[15], final_rounding);
+
+  in[0] = _mm_srai_epi16(in[0], 6);
+  in[1] = _mm_srai_epi16(in[1], 6);
+  in[2] = _mm_srai_epi16(in[2], 6);
+  in[3] = _mm_srai_epi16(in[3], 6);
+  in[4] = _mm_srai_epi16(in[4], 6);
+  in[5] = _mm_srai_epi16(in[5], 6);
+  in[6] = _mm_srai_epi16(in[6], 6);
+  in[7] = _mm_srai_epi16(in[7], 6);
+  in[8] = _mm_srai_epi16(in[8], 6);
+  in[9] = _mm_srai_epi16(in[9], 6);
+  in[10] = _mm_srai_epi16(in[10], 6);
+  in[11] = _mm_srai_epi16(in[11], 6);
+  in[12] = _mm_srai_epi16(in[12], 6);
+  in[13] = _mm_srai_epi16(in[13], 6);
+  in[14] = _mm_srai_epi16(in[14], 6);
+  in[15] = _mm_srai_epi16(in[15], 6);
+
+  RECON_AND_STORE(dest + 0 * stride, in[0]);
+  RECON_AND_STORE(dest + 1 * stride, in[1]);
+  RECON_AND_STORE(dest + 2 * stride, in[2]);
+  RECON_AND_STORE(dest + 3 * stride, in[3]);
+  RECON_AND_STORE(dest + 4 * stride, in[4]);
+  RECON_AND_STORE(dest + 5 * stride, in[5]);
+  RECON_AND_STORE(dest + 6 * stride, in[6]);
+  RECON_AND_STORE(dest + 7 * stride, in[7]);
+  RECON_AND_STORE(dest + 8 * stride, in[8]);
+  RECON_AND_STORE(dest + 9 * stride, in[9]);
+  RECON_AND_STORE(dest + 10 * stride, in[10]);
+  RECON_AND_STORE(dest + 11 * stride, in[11]);
+  RECON_AND_STORE(dest + 12 * stride, in[12]);
+  RECON_AND_STORE(dest + 13 * stride, in[13]);
+  RECON_AND_STORE(dest + 14 * stride, in[14]);
+  RECON_AND_STORE(dest + 15 * stride, in[15]);
+}
+
+void idct4_sse2(__m128i *in);
+void idct8_sse2(__m128i *in);
+void idct16_sse2(__m128i *in0, __m128i *in1);
+void iadst4_sse2(__m128i *in);
+void iadst8_sse2(__m128i *in);
+void iadst16_sse2(__m128i *in0, __m128i *in1);
+
+#endif  // VPX_DSP_X86_INV_TXFM_SSE2_H_
diff --git a/av1/common/x86/vp10_txfm1d_sse4.h b/av1/common/x86/vp10_txfm1d_sse4.h
new file mode 100644
index 0000000..f05a54c
--- /dev/null
+++ b/av1/common/x86/vp10_txfm1d_sse4.h
@@ -0,0 +1,144 @@
+#ifndef VP10_TXMF1D_SSE2_H_
+#define VP10_TXMF1D_SSE2_H_
+
+#include <smmintrin.h>
+#include "av1/common/vp10_txfm.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_fdct4_new_sse4_1(const __m128i *input, __m128i *output,
+                           const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fdct8_new_sse4_1(const __m128i *input, __m128i *output,
+                           const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fdct16_new_sse4_1(const __m128i *input, __m128i *output,
+                            const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fdct32_new_sse4_1(const __m128i *input, __m128i *output,
+                            const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fdct64_new_sse4_1(const __m128i *input, __m128i *output,
+                            const int8_t *cos_bit, const int8_t *stage_range);
+
+void vp10_fadst4_new_sse4_1(const __m128i *input, __m128i *output,
+                            const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fadst8_new_sse4_1(const __m128i *input, __m128i *output,
+                            const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fadst16_new_sse4_1(const __m128i *input, __m128i *output,
+                             const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fadst32_new_sse4_1(const __m128i *input, __m128i *output,
+                             const int8_t *cos_bit, const int8_t *stage_range);
+
+void vp10_idct4_new_sse4_1(const __m128i *input, __m128i *output,
+                           const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_idct8_new_sse4_1(const __m128i *input, __m128i *output,
+                           const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_idct16_new_sse4_1(const __m128i *input, __m128i *output,
+                            const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_idct32_new_sse4_1(const __m128i *input, __m128i *output,
+                            const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_idct64_new_sse4_1(const __m128i *input, __m128i *output,
+                            const int8_t *cos_bit, const int8_t *stage_range);
+
+void vp10_iadst4_new_sse4_1(const __m128i *input, __m128i *output,
+                            const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_iadst8_new_sse4_1(const __m128i *input, __m128i *output,
+                            const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_iadst16_new_sse4_1(const __m128i *input, __m128i *output,
+                             const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_iadst32_new_sse4_1(const __m128i *input, __m128i *output,
+                             const int8_t *cos_bit, const int8_t *stage_range);
+
+static INLINE void transpose_32_4x4(int stride, const __m128i *input,
+                                    __m128i *output) {
+  __m128i temp0 = _mm_unpacklo_epi32(input[0 * stride], input[2 * stride]);
+  __m128i temp1 = _mm_unpackhi_epi32(input[0 * stride], input[2 * stride]);
+  __m128i temp2 = _mm_unpacklo_epi32(input[1 * stride], input[3 * stride]);
+  __m128i temp3 = _mm_unpackhi_epi32(input[1 * stride], input[3 * stride]);
+
+  output[0 * stride] = _mm_unpacklo_epi32(temp0, temp2);
+  output[1 * stride] = _mm_unpackhi_epi32(temp0, temp2);
+  output[2 * stride] = _mm_unpacklo_epi32(temp1, temp3);
+  output[3 * stride] = _mm_unpackhi_epi32(temp1, temp3);
+}
+
+// the entire input block can be represent by a grid of 4x4 blocks
+// each 4x4 blocks can be represent by 4 vertical __m128i
+// we first transpose each 4x4 block internally
+// than transpose the grid
+static INLINE void transpose_32(int txfm_size, const __m128i *input,
+                                __m128i *output) {
+  const int num_per_128 = 4;
+  const int row_size = txfm_size;
+  const int col_size = txfm_size / num_per_128;
+  int r, c;
+
+  // transpose each 4x4 block internally
+  for (r = 0; r < row_size; r += 4) {
+    for (c = 0; c < col_size; c++) {
+      transpose_32_4x4(col_size, &input[r * col_size + c],
+                       &output[c * 4 * col_size + r / 4]);
+    }
+  }
+}
+
+static INLINE __m128i round_shift_32_sse4_1(__m128i vec, int bit) {
+  __m128i tmp, round;
+  round = _mm_set1_epi32(1 << (bit - 1));
+  tmp = _mm_add_epi32(vec, round);
+  return _mm_srai_epi32(tmp, bit);
+}
+
+static INLINE void round_shift_array_32_sse4_1(__m128i *input, __m128i *output,
+                                               const int size, const int bit) {
+  if (bit > 0) {
+    int i;
+    for (i = 0; i < size; i++) {
+      output[i] = round_shift_32_sse4_1(input[i], bit);
+    }
+  } else {
+    int i;
+    for (i = 0; i < size; i++) {
+      output[i] = _mm_slli_epi32(input[i], -bit);
+    }
+  }
+}
+
+// out0 = in0*w0 + in1*w1
+// out1 = -in1*w0 + in0*w1
+#define btf_32_sse4_1_type0(w0, w1, in0, in1, out0, out1, bit) \
+  do {                                                         \
+    __m128i ww0, ww1, in0_w0, in1_w1, in0_w1, in1_w0;          \
+    ww0 = _mm_set1_epi32(w0);                                  \
+    ww1 = _mm_set1_epi32(w1);                                  \
+    in0_w0 = _mm_mullo_epi32(in0, ww0);                        \
+    in1_w1 = _mm_mullo_epi32(in1, ww1);                        \
+    out0 = _mm_add_epi32(in0_w0, in1_w1);                      \
+    out0 = round_shift_32_sse4_1(out0, bit);                   \
+    in0_w1 = _mm_mullo_epi32(in0, ww1);                        \
+    in1_w0 = _mm_mullo_epi32(in1, ww0);                        \
+    out1 = _mm_sub_epi32(in0_w1, in1_w0);                      \
+    out1 = round_shift_32_sse4_1(out1, bit);                   \
+  } while (0)
+
+// out0 = in0*w0 + in1*w1
+// out1 = in1*w0 - in0*w1
+#define btf_32_sse4_1_type1(w0, w1, in0, in1, out0, out1, bit) \
+  do {                                                         \
+    __m128i ww0, ww1, in0_w0, in1_w1, in0_w1, in1_w0;          \
+    ww0 = _mm_set1_epi32(w0);                                  \
+    ww1 = _mm_set1_epi32(w1);                                  \
+    in0_w0 = _mm_mullo_epi32(in0, ww0);                        \
+    in1_w1 = _mm_mullo_epi32(in1, ww1);                        \
+    out0 = _mm_add_epi32(in0_w0, in1_w1);                      \
+    out0 = round_shift_32_sse4_1(out0, bit);                   \
+    in0_w1 = _mm_mullo_epi32(in0, ww1);                        \
+    in1_w0 = _mm_mullo_epi32(in1, ww0);                        \
+    out1 = _mm_sub_epi32(in1_w0, in0_w1);                      \
+    out1 = round_shift_32_sse4_1(out1, bit);                   \
+  } while (0)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // VP10_TXMF1D_SSE2_H_